diff --git a/.gitignore b/.gitignore
index 3288e17f..bfab4c26 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+bd2k-extras/
 pimmuno.py
 pimmuno_2.py
 *.pyc
@@ -5,6 +6,7 @@ pimmuno_2.py
 develop_data/
 venv/
 .cache/
+jobStore/
 test-report.xml
 __pycache__
 *.DONE
diff --git a/MANUAL.md b/MANUAL.md
index b023baad..2ef4b1eb 100644
--- a/MANUAL.md
+++ b/MANUAL.md
@@ -27,87 +27,55 @@ ProTECT is implemented in the [Toil](https://github.com/BD2KGenomics/toil.git) f
 runs the workflow described in [protect/Flowchart.txt](
 https://github.com/BD2KGenomics/protect/blob/master/Flowchart.txt).
 
+**This manual is a quick adaptation for an adaptation of ProTECT to py3** 
+
 
 # Installation
 
 ProTECT requires Toil and we recommend installing ProTECT and its requirements in a
 [virtualenv](http://docs.python-guide.org/en/latest/dev/virtualenvs/).
 
-ProTECT also requires [s3am](https://github.com/BD2KGenomics/s3am.git) version 2.0.1 to download and
+~ProTECT also requires [s3am](https://github.com/BD2KGenomics/s3am.git) version 2.0.1 to download and
 upload files from S3. We recommend installing s3am in its own virtualenv using the directions in
 the s3am manual, then putting the s3am binary on your $PATH.  ProTECT will NOT attempt to install
-s3am during installation.
+s3am during installation.~ 
 
-ProTECT uses pkg_resources from setuptools to verify versions of tools during install. As of setuptools
-39.0.1, some modules were moved to the packaging module. If your machine has setuptools >=39.0.1, you
-will need the packaging module.
+currently WIP. for now, **only references to local files will work**. anything that requires access to s3am (s3 buckets) will **fail**. 
 
 Lastly, ProTECT uses [docker](https://www.docker.com/) to run the various sub-tools in a
 reproducible, platform independent manner. ProTECT will NOT attempt to install docker during
 installation.
 
-### Method 1 - Using PIP (recommended)
-
-First create a virtualenv at your desired location (Here we create it in the folder ~/venvs)
-
-    virtualenv ~/venvs/protect
-
-Activate the virtualenv
-
-    source ~/venvs/protect/bin/activate
-
-NOTE: Installation was tested using pip 7.1.2 and 8.1.1. We have seen issues with the installation
-of pyYAML with lower versions of pip and recommend upgrading pip before installing ProTECT.
-
-    pip install --upgrade pip
-
-Install Toil
-
-    pip install toil[aws]==3.5.2
-
-Install packaging (required if setuptools>=39.0.1)
-
-    pip install packaging
-
-Install ProTECT and all dependencies in the virtualenv
-
-    pip install protect
-
+~Method 1 - Using PIP (recommended)~
 ### Method 2 - Installing from Source
 
 This will install ProTECT in an editable mode.
 
 Obtain the source from Github
 
-    git clone https://www.github.com/BD2KGenomics/protect.git
+    git clone https://www.github.com/Dranion/protect.git
 
 Create and activate a virtualenv in the project folder (Important since the Makefile checks for
 this and will fail if it detects that you are not in a virtual environment)
 
     cd protect
-    virtualenv venv
+    virtualenv --python=python3 venv
     source venv/bin/activate
 
 Install Toil and pytest
 
     make prepare
 
-Install packaging (required if setuptools>=39.0.1)
+Install the python3 conversion of bd2k and s3am. *s3am is untested as I am running locally* 
 
-    pip install packaging
+    make special_install
 
 Install ProTECT
 
     make develop
 
-## Method 3 - Using Docker
+~Method 3 - Using Docker~
 
-Dockerized versions of ProTECT releases can be found at https://quay.io/organization/ucsc_cgl. These
-Docker containers run the ProTECT pipeline in single machine mode. The only difference between the
-Docker and Python versions of the pipeline is that the Docker container takes the config options,
-described below, as command line arguments as opposed to a config file. Running the container
-without any arguments will list all the available options. Also, currently the dockerized version of
-ProTECT only supports local file export.
 
 # Running ProTECT
 
@@ -173,7 +141,7 @@ in the pipeline, and the information on the input samples. Elements before a `:`
 dictionary read into ProTECT and should **NOT** be modified (Barring the patient ID key in the
 patients dictionary). Only values to the right of the `:` should be edited.
 
-Every required reference file is provided in the AWS bucket `cgl-pipeline-inputs` under the folder
+Every required reference file is provided in the AWS bucket `protect-data` under the folder
 `protect/hg19_references` or `protect/hg38_references`. The `README` file in the same location
 describes in detail how each file was generated. To use a file located in an s3 bucket, replace
 `/path/to` in the following descriptions with `s3://<databucket>/<folder_in_bucket>`.
@@ -547,7 +515,7 @@ purposes:
     12: g/f/jobO4yiE4        return self.run(fileStore)
     13: g/f/jobO4yiE4      File "/home/ucsc/arjun/tools/dev/toil_clean/src/toil/job.py", line 1406, in run
     14: g/f/jobO4yiE4        rValue = userFunction(*((self,) + tuple(self._args)), **self._kwargs)
-    15: g/f/jobO4yiE4      File "/home/ucsc/arjun/tools/protect_toil_clean/local/lib/python2.7/site-packages/protect/binding_prediction/common.py", line 566, in merge_mhc_peptide_calls
+    15: g/f/jobO4yiE4      File "/home/ucsc/arjun/tools/protect_toil_clean/local/lib/python3/site-packages/protect/binding_prediction/common.py", line 566, in merge_mhc_peptide_calls
     16: g/f/jobO4yiE4        raise RuntimeError('No peptides available for ranking')
     17: g/f/jobO4yiE4    RuntimeError: No peptides available for ranking
     18: g/f/jobO4yiE4    ERROR:toil.worker:Exiting the worker because of a failed job on host sjcb10st7
@@ -581,9 +549,9 @@ do not store logs from tools (see BD2KGenomics/protect#275). The error looks sim
     Z/O/job1uH92D        return self.run(fileStore)
     Z/O/job1uH92D      File "/home/ucsc/arjun/tools/dev/toil_clean/src/toil/job.py", line 1406, in run
     Z/O/job1uH92D        rValue = userFunction(*((self,) + tuple(self._args)), **self._kwargs)
-    Z/O/job1uH92D      File "/home/ucsc/arjun/tools/protect_toil_clean/local/lib/python2.7/site-packages/protect/mutation_calling/radia.py", line 238, in run_filter_radia
+    Z/O/job1uH92D      File "/home/ucsc/arjun/tools/protect_toil_clean/local/lib/python3/site-packages/protect/mutation_calling/radia.py", line 238, in run_filter_radia
     Z/O/job1uH92D        tool_version=radia_options['version'])
-    Z/O/job1uH92D      File "/home/ucsc/arjun/tools/protect_toil_clean/local/lib/python2.7/site-packages/protect/common.py", line 138, in docker_call
+    Z/O/job1uH92D      File "/home/ucsc/arjun/tools/protect_toil_clean/local/lib/python3/site-packages/protect/common.py", line 138, in docker_call
     Z/O/job1uH92D        'for command \"%s\"' % ' '.join(call),)
     Z/O/job1uH92D    RuntimeError: docker command returned a non-zero exit status (1)for command "docker run --rm=true -v /scratch/bio/ucsc/toil-681c097c-61da-4687-b734-c5051f0aa19f/tmped2fnu/f041f939-5c0d-40be-a884-68635e929d09:/data --log-driver=none aarjunrao/filterradia:bcda721fc1f9c28d8b9224c2f95c440759cd3a03 TCGA-CH-5788 17 /data/radia.vcf /data /home/radia/scripts -d /data/radia_dbsnp -r /data/radia_retrogenes -p /data/radia_pseudogenes -c /data/radia_cosmic -t /data/radia_gencode --noSnpEff --noBlacklist --noTargets --noRnaBlacklist -f /data/hg38.fa --log=INFO -g /data/radia_filtered_chr17_radia.log"
     Z/O/job1uH92D    ERROR:toil.worker:Exiting the worker because of a failed job on host sjcb10st1
diff --git a/Makefile b/Makefile
old mode 100644
new mode 100755
index a5a43cb2..e2e8f2d2
--- a/Makefile
+++ b/Makefile
@@ -45,17 +45,22 @@ help:
 	@echo "$$help"
 
 
-python=python2.7
-pip=pip2.7
+python=python
+pip=pip
 tests=src/protect/test/unit
 extras=
-
 green=\033[0;32m
 normal=\033[0m
 red=\033[0;31m
 
+# WIP 
+special_install: check_venv
+	git clone https://github.com/Dranion/bd2k-extras.git
+	make -C bd2k-extras/bd2k-python-lib develop
+	make -C bd2k-extras/s3am develop
+
 prepare: check_venv
-	@$(pip) install toil==3.8.0 pytest==2.8.3
+	@$(pip) install toil pytest  
 
 develop: check_venv
 	$(pip) install -e .$(extras)
@@ -107,11 +112,10 @@ clean_pypi:
 
 clean: clean_develop clean_sdist clean_pypi
 
-
 check_venv:
-	@$(python) -c 'import sys; sys.exit( int( not hasattr(sys, "real_prefix") ) )' \
-		|| ( echo "$(red)A virtualenv must be active.$(normal)" ; false )
-
+	@$(python) -c 'import sys; sys.exit( int( not (hasattr(sys, "real_prefix") or ( hasattr(sys, "base_prefix") and sys.base_prefix != sys.prefix ) ) ) )' \
+		|| [ ! -z "${VIRTUAL_ENV}" ] \
+		|| ( echo "$(red)A virtualenv must be active.$(normal)\n" ; false )
 
 check_clean_working_copy:
 	@echo "$(green)Checking if your working copy is clean ...$(normal)"
diff --git a/README.md b/README.md
index c7306f5a..eaf1868f 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,10 @@
-[![Stories in Ready](https://badge.waffle.io/BD2KGenomics/protect.png?label=ready&title=Ready)](https://waffle.io/BD2KGenomics/protect)
 # ProTECT
 ### **Pr**ediction **o**f **T**-Cell **E**pitopes for **C**ancer **T**herapy
 
+Adapation of ProTECT to use python 3.8 instead of 2.7. Currently have tested a complete run using fastq files from [HCC1395 WGS Exome RNA Seq Data](https://github.com/genome/gms/wiki/HCC1395-WGS-Exome-RNA-Seq-Data), with identical results in both version of python. 
+
+Adaptation done using 2to3 and manual bug testing. Manual changes recorded [at changes.md](changes.md). Since s3am is python2, **currently is local only**, however an untested python3 version of s3am exists [here](https://github.com/Dranion/bd2k-extras/tree/main). Continuing to the original README: 
+
 This repo contains the Python libraries for the Precision Immunology Pipeline developed at UCSC.
 
     src/protect/pipeline/ProTECT.py             - The python script for running the pipeline.
@@ -20,6 +23,6 @@ All docker images used in this pipeline are available at
 
 
 To learn how the pipeline can be run on a sample, head over to the [ProTECT Manual](
-https://github.com/BD2KGenomics/protect/blob/master/MANUAL.md)
+https://github.com/Dranion/protect/blob/master/MANUAL.md)
 
 ProTECT is currently in its infancy and is under continuous development.  We would appreciate users sharing the level 3 data produced by ProTECT with us such that we can better train our predictive models.
diff --git a/attic/ProTECT.py b/attic/ProTECT.py
index d8ae8690..a3953145 100644
--- a/attic/ProTECT.py
+++ b/attic/ProTECT.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 Arjun Arkal Rao
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -20,7 +20,7 @@
 Program info can be found in the docstring of the main function.
 Details can also be obtained by running the script with -h .
 """
-from __future__ import print_function
+
 
 import argparse
 import errno
@@ -33,7 +33,7 @@
 import time
 from collections import defaultdict, Counter
 from multiprocessing import cpu_count
-from urlparse import urlparse
+from urllib.parse import urlparse
 
 from pysam import Samfile
 
@@ -78,7 +78,7 @@ def parse_config_file(job, config_file):
         # along with it's parameters.
         for groupname, group_params in tool_specific_param_generator(job, conf):
             if groupname == 'patient':
-                if 'patient_id' not in group_params.keys():
+                if 'patient_id' not in list(group_params.keys()):
                     raise ParameterError('A patient group is missing the patient_id flag.')
                 sample_set[group_params['patient_id']] = group_params
             elif groupname == 'Universal_Options':
@@ -104,7 +104,7 @@ def parse_config_file(job, config_file):
         raise ParameterError(' The following tools have no arguments in the config file : \n' +
                              '\n'.join(missing_tools))
     # Start a job for each sample in the sample set
-    for patient_id in sample_set.keys():
+    for patient_id in list(sample_set.keys()):
         job.addFollowOnJobFn(pipeline_launchpad, sample_set[patient_id], univ_options, tool_options)
     return None
 
@@ -248,7 +248,7 @@ def delete_fastqs(job, fastqs):
             +- 'normal_dna': [<JSid for 1.fastq> , <JSid for 2.fastq>]
     """
     for fq_type in ['tumor_rna', 'tumor_dna', 'normal_dna']:
-        for i in xrange(0,2):
+        for i in range(0,2):
             job.fileStore.deleteGlobalFile(fastqs[fq_type][i])
     return None
 
@@ -727,7 +727,7 @@ def spawn_radia(job, rna_bam, tumor_bam, normal_bam, univ_options, radia_options
             'normal_dnai': normal_bam['normal_dna_fix_pg_sorted.bam.bai']}
     # Make a dict object to hold the return values for each of the chromosome jobs.  Then run radia
     # on each chromosome.
-    chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']]
+    chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']]
     perchrom_radia = defaultdict()
     for chrom in chromosomes:
         perchrom_radia[chrom] = job.addChildJobFn(run_radia, bams, univ_options, radia_options,
@@ -755,11 +755,11 @@ def merge_radia(job, perchrom_rvs):
     work_dir = job.fileStore.getLocalTempDir()
     # We need to squash the input dict of dicts to a single dict such that it can be passed to
     # get_files_from_filestore
-    input_files = {filename: jsid for perchrom_files in perchrom_rvs.values()
-                   for filename, jsid in perchrom_files.items()}
+    input_files = {filename: jsid for perchrom_files in list(perchrom_rvs.values())
+                   for filename, jsid in list(perchrom_files.items())}
     input_files = get_files_from_filestore(job, input_files, work_dir,
                                            docker=False)
-    chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']]
+    chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']]
     with open('/'.join([work_dir, 'radia_calls.vcf']), 'w') as radfile, \
             open('/'.join([work_dir, 'radia_filter_passing_calls.vcf']), 'w') as radpassfile:
         for chrom in chromosomes:
@@ -961,7 +961,7 @@ def spawn_mutect(job, tumor_bam, normal_bam, univ_options, mutect_options):
     job.fileStore.logToMaster('Running spawn_mutect on %s' % univ_options['patient'])
     # Make a dict object to hold the return values for each of the chromosome
     # jobs.  Then run mutect on each chromosome.
-    chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']]
+    chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']]
     perchrom_mutect = defaultdict()
     for chrom in chromosomes:
         perchrom_mutect[chrom] = job.addChildJobFn(run_mutect, tumor_bam, normal_bam, univ_options,
@@ -987,10 +987,10 @@ def merge_mutect(job, perchrom_rvs):
     work_dir = job.fileStore.getLocalTempDir()
     # We need to squash the input dict of dicts to a single dict such that it can be passed to
     # get_files_from_filestore
-    input_files = {filename: jsid for perchrom_files in perchrom_rvs.values()
-                   for filename, jsid in perchrom_files.items()}
+    input_files = {filename: jsid for perchrom_files in list(perchrom_rvs.values())
+                   for filename, jsid in list(perchrom_files.items())}
     input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
-    chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']]
+    chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']]
     with open('/'.join([work_dir, 'mutect_calls.vcf']), 'w') as mutvcf, \
             open('/'.join([work_dir, 'mutect_calls.out']), 'w') as mutout, \
             open('/'.join([work_dir, 'mutect_passing_calls.vcf']), 'w') as mutpassvcf:
@@ -1139,7 +1139,7 @@ def run_mutation_aggregator(job, fusion_output, radia_output, mutect_output, ind
     input_files.pop('fusion.vcf')
     # read files into memory
     vcf_file = defaultdict()
-    mutcallers = input_files.keys()
+    mutcallers = list(input_files.keys())
     with open(''.join([work_dir, '/', univ_options['patient'], '_merged_mutations.vcf']),
               'w') as merged_mut_file:
         for mut_caller in mutcallers:
@@ -1571,8 +1571,8 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files):
     mhci_files = get_files_from_filestore(job, mhci_preds, work_dir)
     # First split mhcii_preds into prediction files and predictors and maintain keys so we can later
     # reference them in pairs
-    mhcii_predictors = {x: y[1] for x, y in mhcii_preds.items()}
-    mhcii_files = {x: y[0] for x, y in mhcii_preds.items()}
+    mhcii_predictors = {x: y[1] for x, y in list(mhcii_preds.items())}
+    mhcii_files = {x: y[0] for x, y in list(mhcii_preds.items())}
     mhcii_files = get_files_from_filestore(job, mhcii_files, work_dir)
     # Get peptide files
     pept_files = get_files_from_filestore(job, pept_files, work_dir)
@@ -1584,7 +1584,7 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files):
         pepmap = json.load(mapfile)
     # Incorporate peptide names into the merged calls
     with open('/'.join([work_dir, 'mhci_merged_files.list']), 'w') as mhci_resfile:
-        for mhcifile in mhci_files.values():
+        for mhcifile in list(mhci_files.values()):
             with open(mhcifile, 'r') as mf:
                 for line in mf:
                     # Skip header lines
@@ -1605,7 +1605,7 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files):
     # Incorporate peptide names into the merged calls
     with open('/'.join([work_dir, 'mhcii_merged_files.list']), 'w') as \
             mhcii_resfile:
-        for mhciifile in mhcii_files.keys():
+        for mhciifile in list(mhcii_files.keys()):
             core_col = None  # Variable to hold the column number with the core
             if mhcii_predictors[mhciifile] == 'Consensus':
                 with open(mhcii_files[mhciifile], 'r') as mf:
@@ -1814,7 +1814,7 @@ def prepare_samples(job, fastqs, univ_options):
                        'normal_dna_fastq_prefix'}
     if set(fastqs.keys()).difference(allowed_samples) != {'patient_id'}:
         raise ParameterError('Sample with the following parameters has an error:\n' +
-                             '\n'.join(fastqs.values()))
+                             '\n'.join(list(fastqs.values())))
     # For each sample type, check if the prefix is an S3 link or a regular file
     # Download S3 files.
     for sample_type in ['tumor_dna', 'tumor_rna', 'normal_dna']:
@@ -1877,7 +1877,7 @@ def get_files_from_filestore(job, files, work_dir, cache=True, docker=False):
     work_dir is the location where the file should be stored
     cache indiciates whether caching should be used
     """
-    for name in files.keys():
+    for name in list(files.keys()):
         outfile = job.fileStore.readGlobalFile(files[name], '/'.join([work_dir, name]), cache=cache)
         # If the file pointed to a tarball, extract it to WORK_DIR
         if tarfile.is_tarfile(outfile) and file_xext(outfile).startswith('.tar'):
@@ -1924,15 +1924,15 @@ def most_probable_alleles(allele_list):
         except KeyError:
             all_alleles[allele] = [float(pvalue)]
     # If there are less than 2 alleles, report all
-    if len(all_alleles.keys()) <= 2:
-        return all_alleles.keys()
+    if len(list(all_alleles.keys())) <= 2:
+        return list(all_alleles.keys())
     # Else, get the two with most evidence.  Evidence is gauged by
     # a) How many files (of the 3) thought that Allele was present
     # b) In a tie, who has a lower avg p value
     # In the lambda function, if 2 alleles have the same number of calls, the sum of the p values is
     # a measure of the avg because avg = sum / n and n is equal in both of them.
     else:
-        return sorted(all_alleles.keys(), key=lambda x: \
+        return sorted(list(all_alleles.keys()), key=lambda x: \
             (-len(all_alleles[x]), sum(all_alleles[x])))[0:2]
 
 
@@ -2111,7 +2111,7 @@ def print_mhc_peptide(neoepitope_info, peptides, pepmap, outfile):
 
     """
     allele, pept, pred, core = neoepitope_info
-    peptide_names = [x for x, y in peptides.items() if pept in y]
+    peptide_names = [x for x, y in list(peptides.items()) if pept in y]
     # For each peptide, append the ensembl gene
     for peptide_name in peptide_names:
         print(allele, pept, peptide_name, core, '0', pred, pepmap[peptide_name], sep='\t',
@@ -2514,7 +2514,7 @@ def strip_xext(filepath):
     :return str filepath: Path to the file with the compression extension stripped off.
     """
     ext_size = len(file_xext(filepath).split('.')) - 1
-    for i in xrange(0, ext_size):
+    for i in range(0, ext_size):
         filepath = os.path.splitext(filepath)[0]
     return filepath
 
diff --git a/attic/ProTECT_large.py b/attic/ProTECT_large.py
index 40a7a200..485ea322 100644
--- a/attic/ProTECT_large.py
+++ b/attic/ProTECT_large.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 Arjun Arkal Rao
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -20,7 +20,7 @@
 Program info can be found in the docstring of the main function.
 Details can also be obtained by running the script with -h .
 """
-from __future__ import print_function
+
 
 import argparse
 import base64
@@ -79,7 +79,7 @@ def parse_config_file(job, config_file):
         # along with it's parameters.
         for groupname, group_params in tool_specific_param_generator(job, conf):
             if groupname == 'patient':
-                if 'patient_id' not in group_params.keys():
+                if 'patient_id' not in list(group_params.keys()):
                     raise ParameterError('A patient group is missing the patient_id flag.')
                 sample_set[group_params['patient_id']] = group_params
             elif groupname == 'Universal_Options':
@@ -101,7 +101,7 @@ def parse_config_file(job, config_file):
         raise ParameterError(' The following tools have no arguments in the config file : \n' +
                              '\n'.join(missing_tools))
     # Start a job for each sample in the sample set
-    for patient_id in sample_set.keys():
+    for patient_id in list(sample_set.keys()):
         job.addFollowOnJobFn(pipeline_launchpad, sample_set[patient_id], univ_options, tool_options)
     return None
 
@@ -304,7 +304,7 @@ def delete_fastqs(job, fastqs):
             +- 'normal_dna': [<JSid for 1.fastq> , <JSid for 2.fastq>]
     """
     for fq_type in ['tumor_rna', 'tumor_dna', 'normal_dna']:
-        for i in xrange(0,2):
+        for i in range(0,2):
             job.fileStore.deleteGlobalFile(fastqs[fq_type][i])
     return None
 
@@ -708,7 +708,7 @@ def spawn_radia(job, rna_bam, tumor_bam, normal_bam, univ_options, radia_options
             'normal_dnai': normal_bam['normal_dna_fix_pg_sorted.bam.bai']}
     # Make a dict object to hold the return values for each of the chromosome jobs.  Then run radia
     # on each chromosome.
-    chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']]
+    chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']]
     perchrom_radia = defaultdict()
     for chrom in chromosomes:
         perchrom_radia[chrom] = job.addChildJobFn(run_radia, bams, univ_options, radia_options,
@@ -736,11 +736,11 @@ def merge_radia(job, perchrom_rvs):
     work_dir = job.fileStore.getLocalTempDir()
     # We need to squash the input dict of dicts to a single dict such that it can be passed to
     # get_files_from_filestore
-    input_files = {filename: jsid for perchrom_files in perchrom_rvs.values()
-                   for filename, jsid in perchrom_files.items()}
+    input_files = {filename: jsid for perchrom_files in list(perchrom_rvs.values())
+                   for filename, jsid in list(perchrom_files.items())}
     input_files = get_files_from_filestore(job, input_files, work_dir,
                                            docker=False)
-    chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']]
+    chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']]
     with open('/'.join([work_dir, 'radia_calls.vcf']), 'w') as radfile, \
             open('/'.join([work_dir, 'radia_filter_passing_calls.vcf']), 'w') as radpassfile:
         for chrom in chromosomes:
@@ -942,7 +942,7 @@ def spawn_mutect(job, tumor_bam, normal_bam, univ_options, mutect_options):
     job.fileStore.logToMaster('Running spawn_mutect on %s' % univ_options['patient'])
     # Make a dict object to hold the return values for each of the chromosome
     # jobs.  Then run mutect on each chromosome.
-    chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']]
+    chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']]
     perchrom_mutect = defaultdict()
     for chrom in chromosomes:
         perchrom_mutect[chrom] = job.addChildJobFn(run_mutect, tumor_bam, normal_bam, univ_options,
@@ -968,10 +968,10 @@ def merge_mutect(job, perchrom_rvs):
     work_dir = job.fileStore.getLocalTempDir()
     # We need to squash the input dict of dicts to a single dict such that it can be passed to
     # get_files_from_filestore
-    input_files = {filename: jsid for perchrom_files in perchrom_rvs.values()
-                   for filename, jsid in perchrom_files.items()}
+    input_files = {filename: jsid for perchrom_files in list(perchrom_rvs.values())
+                   for filename, jsid in list(perchrom_files.items())}
     input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
-    chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']]
+    chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']]
     with open('/'.join([work_dir, 'mutect_calls.vcf']), 'w') as mutvcf, \
             open('/'.join([work_dir, 'mutect_calls.out']), 'w') as mutout, \
             open('/'.join([work_dir, 'mutect_passing_calls.vcf']), 'w') as mutpassvcf:
@@ -1120,7 +1120,7 @@ def run_mutation_aggregator(job, fusion_output, radia_output, mutect_output, ind
     input_files.pop('fusion.vcf')
     # read files into memory
     vcf_file = defaultdict()
-    mutcallers = input_files.keys()
+    mutcallers = list(input_files.keys())
     with open(''.join([work_dir, '/', univ_options['patient'], '_merged_mutations.vcf']),
               'w') as merged_mut_file:
         for mut_caller in mutcallers:
@@ -1566,8 +1566,8 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files):
     mhci_files = get_files_from_filestore(job, mhci_preds, work_dir)
     # First split mhcii_preds into prediction files and predictors and maintain keys so we can later
     # reference them in pairs
-    mhcii_predictors = {x: y[1] for x, y in mhcii_preds.items()}
-    mhcii_files = {x: y[0] for x, y in mhcii_preds.items()}
+    mhcii_predictors = {x: y[1] for x, y in list(mhcii_preds.items())}
+    mhcii_files = {x: y[0] for x, y in list(mhcii_preds.items())}
     mhcii_files = get_files_from_filestore(job, mhcii_files, work_dir)
     # Get peptide files
     pept_files = get_files_from_filestore(job, pept_files, work_dir)
@@ -1579,7 +1579,7 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files):
         pepmap = json.load(mapfile)
     # Incorporate peptide names into the merged calls
     with open('/'.join([work_dir, 'mhci_merged_files.list']), 'w') as mhci_resfile:
-        for mhcifile in mhci_files.values():
+        for mhcifile in list(mhci_files.values()):
             with open(mhcifile, 'r') as mf:
                 for line in mf:
                     # Skip header lines
@@ -1600,7 +1600,7 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files):
     # Incorporate peptide names into the merged calls
     with open('/'.join([work_dir, 'mhcii_merged_files.list']), 'w') as \
             mhcii_resfile:
-        for mhciifile in mhcii_files.keys():
+        for mhciifile in list(mhcii_files.keys()):
             core_col = None  # Variable to hold the column number with the core
             if mhcii_predictors[mhciifile] == 'Consensus':
                 with open(mhcii_files[mhciifile], 'r') as mf:
@@ -1807,7 +1807,7 @@ def prepare_samples(job, fastqs, univ_options):
                        'normal_dna_fastq_prefix'}
     if set(fastqs.keys()).difference(allowed_samples) != {'patient_id'}:
         raise ParameterError('Sample with the following parameters has an error:\n' +
-                             '\n'.join(fastqs.values()))
+                             '\n'.join(list(fastqs.values())))
     # For each sample type, check if the prefix is an S3 link or a regular file
     # Download S3 files.
     for sample_type in ['tumor_dna', 'tumor_rna', 'normal_dna']:
@@ -1869,7 +1869,7 @@ def get_files_from_filestore(job, files, work_dir, cache=True, docker=False):
     work_dir is the location where the file should be stored
     cache indiciates whether caching should be used
     """
-    for name in files.keys():
+    for name in list(files.keys()):
         outfile = job.fileStore.readGlobalFile(files[name], '/'.join([work_dir, name]), cache=cache)
         # If the file pointed to a tarball, extract it to WORK_DIR
         if tarfile.is_tarfile(outfile) and file_xext(outfile).startswith('.tar'):
@@ -1916,15 +1916,15 @@ def most_probable_alleles(allele_list):
         except KeyError:
             all_alleles[allele] = [float(pvalue)]
     # If there are less than 2 alleles, report all
-    if len(all_alleles.keys()) <= 2:
-        return all_alleles.keys()
+    if len(list(all_alleles.keys())) <= 2:
+        return list(all_alleles.keys())
     # Else, get the two with most evidence.  Evidence is gauged by
     # a) How many files (of the 3) thought that Allele was present
     # b) In a tie, who has a lower avg p value
     # In the lambda function, if 2 alleles have the same number of calls, the sum of the p values is
     # a measure of the avg because avg = sum / n and n is equal in both of them.
     else:
-        return sorted(all_alleles.keys(), key=lambda x: \
+        return sorted(list(all_alleles.keys()), key=lambda x: \
             (-len(all_alleles[x]), sum(all_alleles[x])))[0:2]
 
 
@@ -2103,7 +2103,7 @@ def print_mhc_peptide(neoepitope_info, peptides, pepmap, outfile):
 
     """
     allele, pept, pred, core = neoepitope_info
-    peptide_names = [x for x, y in peptides.items() if pept in y]
+    peptide_names = [x for x, y in list(peptides.items()) if pept in y]
     # For each peptide, append the ensembl gene
     for peptide_name in peptide_names:
         print(allele, pept, peptide_name, core, '0', pred, pepmap[peptide_name], sep='\t',
@@ -2446,7 +2446,7 @@ def strip_xext(filepath):
     :return str filepath: Path to the file with the compression extension stripped off.
     """
     ext_size = len(file_xext(filepath).split('.')) - 1
-    for i in xrange(0, ext_size):
+    for i in range(0, ext_size):
         filepath = os.path.splitext(filepath)[0]
     return filepath
 
diff --git a/attic/encrypt_files_in_dir_to_s3.py b/attic/encrypt_files_in_dir_to_s3.py
index 67fac013..69bd0360 100644
--- a/attic/encrypt_files_in_dir_to_s3.py
+++ b/attic/encrypt_files_in_dir_to_s3.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright (C) 2016 UCSC Computational Genomics Lab
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -24,7 +24,7 @@
 
 Move files in a directory, or entire directory structures to S3 with (or without) encryption.
 """
-from __future__ import print_function
+
 import argparse
 import base64
 import hashlib
diff --git a/attic/precision_immuno.py b/attic/precision_immuno.py
index 73963b43..d958b8c2 100644
--- a/attic/precision_immuno.py
+++ b/attic/precision_immuno.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 Arjun Arkal Rao
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -20,7 +20,7 @@
 Program info can be found in the docstring of the main function.
 Details can also be obtained by running the script with -h .
 """
-from __future__ import print_function
+
 
 import argparse
 import base64
@@ -77,7 +77,7 @@ def parse_config_file(job, config_file):
         # along with it's parameters.
         for groupname, group_params in tool_specific_param_generator(job, conf):
             if groupname == 'patient':
-                if 'patient_id' not in group_params.keys():
+                if 'patient_id' not in list(group_params.keys()):
                     raise ParameterError('A patient group is missing the patient_id flag.')
                 sample_set[group_params['patient_id']] = group_params
             elif groupname == 'Universal_Options':
@@ -99,7 +99,7 @@ def parse_config_file(job, config_file):
         raise ParameterError(' The following tools have no arguments in the config file : \n' +
                              '\n'.join(missing_tools))
     # Start a job for each sample in the sample set
-    for patient_id in sample_set.keys():
+    for patient_id in list(sample_set.keys()):
         job.addFollowOnJobFn(pipeline_launchpad, sample_set[patient_id], univ_options, tool_options)
     return None
 
@@ -296,7 +296,7 @@ def delete_fastqs(job, fastqs):
             +- 'normal_dna': [<JSid for 1.fastq> , <JSid for 2.fastq>]
     """
     for fq_type in ['tumor_rna', 'tumor_dna', 'normal_dna']:
-        for i in xrange(0,2):
+        for i in range(0,2):
             job.fileStore.deleteGlobalFile(fastqs[fq_type][i])
     return None
 
@@ -685,7 +685,7 @@ def spawn_radia(job, rna_bam, tumor_bam, normal_bam, univ_options, radia_options
             'normal_dnai': normal_bam['normal_dna_fix_pg_sorted.bam.bai']}
     # Make a dict object to hold the return values for each of the chromosome jobs.  Then run radia
     # on each chromosome.
-    chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']]
+    chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']]
     perchrom_radia = defaultdict()
     for chrom in chromosomes:
         perchrom_radia[chrom] = job.addChildJobFn(run_radia, bams, univ_options, radia_options,
@@ -710,11 +710,11 @@ def merge_radia(job, perchrom_rvs):
     work_dir = job.fileStore.getLocalTempDir()
     # We need to squash the input dict of dicts to a single dict such that it can be passed to
     # get_files_from_filestore
-    input_files = {filename: jsid for perchrom_files in perchrom_rvs.values()
-                   for filename, jsid in perchrom_files.items()}
+    input_files = {filename: jsid for perchrom_files in list(perchrom_rvs.values())
+                   for filename, jsid in list(perchrom_files.items())}
     input_files = get_files_from_filestore(job, input_files, work_dir,
                                            docker=False)
-    chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']]
+    chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']]
     with open('/'.join([work_dir, 'radia_calls.vcf']), 'w') as radfile, \
             open('/'.join([work_dir, 'radia_filter_passing_calls.vcf']), 'w') as radpassfile:
         for chrom in chromosomes:
@@ -909,7 +909,7 @@ def spawn_mutect(job, tumor_bam, normal_bam, univ_options, mutect_options):
     job.fileStore.logToMaster('Running spawn_mutect on %s' % univ_options['patient'])
     # Make a dict object to hold the return values for each of the chromosome
     # jobs.  Then run mutect on each chromosome.
-    chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']]
+    chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']]
     perchrom_mutect = defaultdict()
     for chrom in chromosomes:
         perchrom_mutect[chrom] = job.addChildJobFn(run_mutect, tumor_bam, normal_bam, univ_options,
@@ -932,10 +932,10 @@ def merge_mutect(job, perchrom_rvs):
     work_dir = job.fileStore.getLocalTempDir()
     # We need to squash the input dict of dicts to a single dict such that it can be passed to
     # get_files_from_filestore
-    input_files = {filename: jsid for perchrom_files in perchrom_rvs.values()
-                   for filename, jsid in perchrom_files.items()}
+    input_files = {filename: jsid for perchrom_files in list(perchrom_rvs.values())
+                   for filename, jsid in list(perchrom_files.items())}
     input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
-    chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']]
+    chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']]
     with open('/'.join([work_dir, 'mutect_calls.vcf']), 'w') as mutvcf, \
             open('/'.join([work_dir, 'mutect_calls.out']), 'w') as mutout, \
             open('/'.join([work_dir, 'mutect_passing_calls.vcf']), 'w') as mutpassvcf:
@@ -1076,7 +1076,7 @@ def run_mutation_aggregator(job, fusion_output, radia_output, mutect_output, ind
     input_files.pop('fusion.vcf')
     # read files into memory
     vcf_file = defaultdict()
-    mutcallers = input_files.keys()
+    mutcallers = list(input_files.keys())
     with open(''.join([work_dir, '/', univ_options['patient'], '_merged_mutations.vcf']),
               'w') as merged_mut_file:
         for mut_caller in mutcallers:
@@ -1502,8 +1502,8 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files):
     mhci_files = get_files_from_filestore(job, mhci_preds, work_dir)
     # First split mhcii_preds into prediction files and predictors and maintain keys so we can later
     # reference them in pairs
-    mhcii_predictors = {x: y[1] for x, y in mhcii_preds.items()}
-    mhcii_files = {x: y[0] for x, y in mhcii_preds.items()}
+    mhcii_predictors = {x: y[1] for x, y in list(mhcii_preds.items())}
+    mhcii_files = {x: y[0] for x, y in list(mhcii_preds.items())}
     mhcii_files = get_files_from_filestore(job, mhcii_files, work_dir)
     # Get peptide files
     pept_files = get_files_from_filestore(job, pept_files, work_dir)
@@ -1515,7 +1515,7 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files):
         pepmap = json.load(mapfile)
     # Incorporate peptide names into the merged calls
     with open('/'.join([work_dir, 'mhci_merged_files.list']), 'w') as mhci_resfile:
-        for mhcifile in mhci_files.values():
+        for mhcifile in list(mhci_files.values()):
             with open(mhcifile, 'r') as mf:
                 for line in mf:
                     # Skip header lines
@@ -1536,7 +1536,7 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files):
     # Incorporate peptide names into the merged calls
     with open('/'.join([work_dir, 'mhcii_merged_files.list']), 'w') as \
             mhcii_resfile:
-        for mhciifile in mhcii_files.keys():
+        for mhciifile in list(mhcii_files.keys()):
             core_col = None  # Variable to hold the column number with the core
             if mhcii_predictors[mhciifile] == 'Consensus':
                 with open(mhcii_files[mhciifile], 'r') as mf:
@@ -1740,7 +1740,7 @@ def prepare_samples(job, fastqs, univ_options):
                        'normal_dna_fastq_prefix'}
     if set(fastqs.keys()).difference(allowed_samples) != {'patient_id'}:
         raise ParameterError('Sample with the following parameters has an error:\n' +
-                             '\n'.join(fastqs.values()))
+                             '\n'.join(list(fastqs.values())))
     # For each sample type, check if the prefix is an S3 link or a regular file
     # Download S3 files.
     for sample_type in ['tumor_dna', 'tumor_rna', 'normal_dna']:
@@ -1800,7 +1800,7 @@ def get_files_from_filestore(job, files, work_dir, cache=True, docker=False):
     work_dir is the location where the file should be stored
     cache indiciates whether caching should be used
     """
-    for name in files.keys():
+    for name in list(files.keys()):
         outfile = job.fileStore.readGlobalFile(files[name], '/'.join([work_dir, name]), cache=cache)
         # If the file pointed to a tarball, extract it to WORK_DIR
         if tarfile.is_tarfile(outfile) and file_xext(outfile).startswith('.tar'):
@@ -1847,15 +1847,15 @@ def most_probable_alleles(allele_list):
         except KeyError:
             all_alleles[allele] = [float(pvalue)]
     # If there are less than 2 alleles, report all
-    if len(all_alleles.keys()) <= 2:
-        return all_alleles.keys()
+    if len(list(all_alleles.keys())) <= 2:
+        return list(all_alleles.keys())
     # Else, get the two with most evidence.  Evidence is gauged by
     # a) How many files (of the 3) thought that Allele was present
     # b) In a tie, who has a lower avg p value
     # In the lambda function, if 2 alleles have the same number of calls, the sum of the p values is
     # a measure of the avg because avg = sum / n and n is equal in both of them.
     else:
-        return sorted(all_alleles.keys(), key=lambda x: \
+        return sorted(list(all_alleles.keys()), key=lambda x: \
             (-len(all_alleles[x]), sum(all_alleles[x])))[0:2]
 
 
@@ -2031,7 +2031,7 @@ def print_mhc_peptide(neoepitope_info, peptides, pepmap, outfile):
                      'ensembl_gene\thugo_gene\tcomma_sep_transcript_mutations'
     """
     allele, pept, pred, core = neoepitope_info
-    peptide_names = [x for x, y in peptides.items() if pept in y]
+    peptide_names = [x for x, y in list(peptides.items()) if pept in y]
     # For each peptide, append the ensembl gene
     for peptide_name in peptide_names:
         print(allele, pept, peptide_name, core, '0', pred, pepmap[peptide_name], sep='\t',
@@ -2368,7 +2368,7 @@ def strip_xext(filepath):
     :return str filepath: Path to the file with the compression extension stripped off.
     """
     ext_size = len(file_xext(filepath).split('.')) - 1
-    for i in xrange(0, ext_size):
+    for i in range(0, ext_size):
         filepath = os.path.splitext(filepath)[0]
     return filepath
 
diff --git a/changes.md b/changes.md
new file mode 100644
index 00000000..1d7fa69c
--- /dev/null
+++ b/changes.md
@@ -0,0 +1,22 @@
+# Manual Changes to ProTECT 
+:star: indicates changes to the algorithm that *could* potentially change results, though best efforts were made for 1:1 conversion  
+
+:black_square_button: indicates changes i hope to reverse and are only 'temp fixes'
+
+- Originally ran 2to3 in commit a5d062fab68f8bbbebc2bbe9f4192b47b451146e
+	- removed explicit versioning in the [Makefile](https://github.com/BD2KGenomics/protect/commit/a5d062fab68f8bbbebc2bbe9f4192b47b451146e#diff-76ed074a9305c04054cdebb9e9aad2d818052b07091de1f20cad0bbac34ffb52) since py3 version still in dev
+- :black_square_button: [removed version checks in setup.py](https://github.com/BD2KGenomics/protect/commit/f04f22fb9f50270e5c0307d4a64aca0f3f7022d3) and obsolete setuptools 
+	- [along with setup version](https://github.com/BD2KGenomics/protect/commit/f70d3196198a2530406906b8af5a55b848aa0b14)
+- [changed default references](https://github.com/BD2KGenomics/protect/commit/c2fe3a8b8223682e6d63cccb4fccf0787227c525) from s3://cgl-pipeline-inputs to s3://protect-data 
+	- this s3 bucket is pay to access, however currently s3am is untested and only automatically converted 
+- :star::star: [common.py chromosome sorting](https://github.com/BD2KGenomics/protect/commit/b5ca956f3dfe05bf6714be8135cc90fe48140d98)
+-  [docker image decodes to utf-8](https://github.com/BD2KGenomics/protect/commit/1d2bdb941548bdf4703113140d1f0758791bf88a)
+-  [IOBase check rather than file](https://github.com/BD2KGenomics/protect/commit/351e855184ae218242a42e1aaa5781d22aba0511) 
+-  [some binary vs string adaptations](https://github.com/BD2KGenomics/protect/commit/5a4c50d1d2b8c71f3bc2f512f3679e80368044be#diff-e46b0e6e9cc33d9130334ab6994c9684b0972aaca58c889b6c1f4819751f1c79) 
+-  :star: [change obselete ix to loc](https://github.com/BD2KGenomics/protect/commit/66bc12db0b815ab2099ee0174a06b923240322a4#diff-3347ae223ced4e929cf7f273bf839bdeb219d82681f8e66a951d85cbeb079685)
+-  :black_square_button: quick fix: running into problem with '80.0' default for cores. can't figure out where the default is coming from so manual changes: 
+	-  [phlat cores to 10](https://github.com/BD2KGenomics/protect/commit/66bc12db0b815ab2099ee0174a06b923240322a4#diff-7e85a3e4e9c911fded129ff48b2dd983d800c5190412f641eee85ff23ed9295c)
+	-  [rsem cores to 20](https://github.com/BD2KGenomics/protect/commit/66bc12db0b815ab2099ee0174a06b923240322a4#diff-1615337ffdbffe39413f26e4ccbb5309ed10b61987559df23ce6fc57cb5dd86a) 
+	-  [star fusion cores to 20](https://github.com/BD2KGenomics/protect/commit/66bc12db0b815ab2099ee0174a06b923240322a4#diff-60e2cfd2feabfe71442d69d1d0d44ff293f8fe1e12aa74c3fe52101d5b32e60e)
+-  [string.maketrans is obselete, str.maketrans is better](https://github.com/BD2KGenomics/protect/commit/66bc12db0b815ab2099ee0174a06b923240322a4#diff-60e2cfd2feabfe71442d69d1d0d44ff293f8fe1e12aa74c3fe52101d5b32e60eR300) 
+-  changed gunzip file write to use a library (faster?) 
diff --git a/docker/pipelineWrapper.py b/docker/pipelineWrapper.py
index d767e41b..12502809 100644
--- a/docker/pipelineWrapper.py
+++ b/docker/pipelineWrapper.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import print_function
+
 
 import tarfile
 import argparse
@@ -103,7 +103,7 @@ def getName(fileName):
                 # move individual files out
                 for fileName in consistentNaming:
                     shutil.copyfile(getName(fileName), os.path.join(output_dir, os.path.basename(fileName)))
-                for src, dst in renamingNeeded.iteritems():
+                for src, dst in renamingNeeded.items():
                     if dst.endswith('.tar'):
                         make_tar(getName(src), os.path.join(output_dir, dst))
                     else:
diff --git a/docker/wrapper.py b/docker/wrapper.py
index c142f387..0d08c223 100644
--- a/docker/wrapper.py
+++ b/docker/wrapper.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import print_function
+
 from pipelineWrapper import PipelineWrapperBuilder
 import logging
 import os
@@ -437,20 +437,20 @@ def str2bool(v):
                         help='Tabix index for dbsnp.gz.')
 
     parser.add_argument('--mhc-pathways-file', type=str,
-                        default="S3://cgl-pipeline-inputs/protect/ci_references/"
+                        default="S3://protect-data/hg38_references/"
                                 "mhc_pathways.tsv.tar.gz",
                         help='JSON file containing the various genes in the MHC pathway'
                              'and their mean TPM expressions across samples in a background set.')
     parser.add_argument('--itx-resistance-file', type=str,
-                        default="S3://cgl-pipeline-inputs/protect/ci_references/"
+                        default="S3://protect-data/hg38_references/"
                                 "itx_resistance.tsv.tar.gz",
                         help='')
     parser.add_argument('--immune-resistance-pathways-file', type=str,
-                        default="S3://cgl-pipeline-inputs/protect/ci_references/"
+                        default="S3://protect-data/hg38_references/"
                                 "immune_resistance_pathways.json.tar.gz",
                         help='')
     parser.add_argument('--car-t-targets-file', type=str,
-                        default="S3://cgl-pipeline-inputs/protect/ci_references/"
+                        default="S3://protect-data/hg38_references/"
                                 "car_t_targets.tsv.tar.gzz",
                         help='')
 
diff --git a/setup.py b/setup.py
old mode 100644
new mode 100755
index 96cb4ebe..03c4a4b0
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -14,25 +14,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from pkg_resources import parse_version
-try:
-    from pkg_resources import SetuptoolsLegacyVersion as _LegacyVersion
-except ImportError as e:
-    if 'SetuptoolsLegacyVersion' in e.message:
-        from packaging.version import LegacyVersion as _LegacyVersion
-    else:
-        raise
+#try:
+#    from pkg_resources import SetuptoolsLegacyVersion as _LegacyVersion
+#except ImportError as e:
+#    if 'SetuptoolsLegacyVersion' in e.message:
+#        from packaging.version import LegacyVersion as _LegacyVersion
+#    else:
+#        raise
 from setuptools import find_packages, setup
 from setuptools.command.test import test as TestCommand
-from version import version
+#from version import version
 
 import errno
 import subprocess
 import sys
 
-
-toil_version = '3.8.0'
-s3am_version = '2.0.1'
-gdc_version = 'v1.1.0'
+#outdated for python3 
+#toil_version = '3.8.0'
+#s3am_version = '2.0.1'
+#gdc_version = 'v1.1.0'
 
 
 def check_tool_version(tool, required_version, blacklisted_versions=None, binary=False):
@@ -66,9 +66,9 @@ def check_tool_version(tool, required_version, blacklisted_versions=None, binary
             raise RuntimeError('Does %s have a version.py?' % tool)
 
     if type(parse_version(installed_version)) == _LegacyVersion:
-        print('Detecting that the installed version of "%s"(%s) is probably based off a git commit '
+        print(('Detecting that the installed version of "%s"(%s) is probably based off a git commit '
               'and assuming this build is for testing purposes.  If this is not the case, please '
-              'try again with a valid version of "%s".' % (tool, installed_version, tool))
+              'try again with a valid version of "%s".' % (tool, installed_version, tool)))
     elif parse_version(installed_version) < parse_version(required_version):
         raise RuntimeError('%s was detected to be version (%s) but ProTECT requires (%s)' %
                            (tool, installed_version, required_version))
@@ -79,11 +79,11 @@ def check_tool_version(tool, required_version, blacklisted_versions=None, binary
 
 
 # Check Toil version
-check_tool_version('toil', toil_version, binary=True)
+#check_tool_version('toil', toil_version, binary=True)
 # Check S3am version
-check_tool_version('s3am', s3am_version, binary=True)
+#check_tool_version('s3am', s3am_version, binary=True)
 # Check gdc-client version
-check_tool_version('gdc-client', gdc_version, binary=True, blacklisted_versions=['v1.2.0'])
+#check_tool_version('gdc-client', gdc_version, binary=True, blacklisted_versions=['v1.2.0'])
 
 
 # Set up a test class
@@ -109,7 +109,6 @@ def run_tests(self):
 
 
 setup(name='protect',
-      version=version,
       description='Prediction of T-Cell Epitopes for Cancer Therapy',
       url='http://github.com/BD2KGenomics/protect',
       author='Arjun Arkal Rao',
@@ -117,10 +116,10 @@ def run_tests(self):
       license='Apache',
       install_requires=[
           'PyYAML',
-          'pandas==0.19.2'
+          'pandas'
       ],
       tests_require=[
-          'pytest==2.8.3'],
+          'pytest'],
       test_suite='protect',
       entry_points={
           'console_scripts': [
diff --git a/src/__init__.py b/src/__init__.py
index ba9b9130..dabfb0af 100644
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,4 +13,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
+
diff --git a/src/protect/ProTECT_config.yaml b/src/protect/ProTECT_config.yaml
new file mode 100644
index 00000000..239a42d0
--- /dev/null
+++ b/src/protect/ProTECT_config.yaml
@@ -0,0 +1,179 @@
+## Copyright 2016 UCSC Computational Genomics Lab
+## Original contributor: Arjun Arkal Rao
+##
+## Licensed under the Apache License, Version 2.0 (the "License");
+## you may not use this file except in compliance with the License.
+## You may obtain a copy of the License at
+##
+##    http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+####################################################################################################
+####################################################################################################
+##  This is the input parameter file for the precision immuno pipeline.  The parameters for each of
+##  the tools is provided here.  The file is written in the YAML format.  A nice description of the
+##  format can be found at http://docs.ansible.com/ansible/YAMLSyntax.html
+##
+##  You can add comments anywhere in this file by prefixing it with a '#'
+##
+##  Unless otherwise mentioned, all fields must be filled.
+##
+####################################################################################################
+####################################################################################################
+
+# Any number of patients/samples can be listed here
+patients:
+    # Each group starts with the patient ID
+    PRTCT-01:
+        # The paths should point to the forward read of the pair. The pipeline assumes that the
+        # forward and reverse read files have the same prefix
+        tumor_dna_fastq_1: /home/dranion/treehouse/gms/hcc1395_1tenth_percent/sort/gerald_D1VCPACXX_1_R1.fastq
+        tumor_dna_fastq_2: /home/dranion/treehouse/gms/hcc1395_1tenth_percent/sort/gerald_D1VCPACXX_1_R2.fastq
+        normal_dna_fastq_1: /home/dranion/treehouse/gms/hcc1395_1tenth_percent/sort/gerald_D1VCPACXX_6_R1.fastq
+        normal_dna_fastq_2: /home/dranion/treehouse/gms/hcc1395_1tenth_percent/sort/gerald_D1VCPACXX_6_R2.fastq
+        tumor_rna_fastq_1: /home/dranion/treehouse/gms/hcc1395_1tenth_percent/sort/gerald_C1TD1ACXX_8_ACAGTG_R1.fastq
+        tumor_rna_fastq_2: /home/dranion/treehouse/gms/hcc1395_1tenth_percent/sort/gerald_C1TD1ACXX_8_ACAGTG_R2.fastq
+        tumor_type: SKCM
+        # ssec_encrypted: False
+        # filter_for_OxoG: False
+    # PRTCT-02:
+        # The paths can also be to directories on S3 as
+        #tumor_dna_fastq_1: S3://bucket/path/to/<tumor_dna_prefix>1.fastq.gz
+        #normal_dna_fastq_1: S3://bucket/path/to/<tumor_dna_prefix>1.fastq.gz
+        #tumor_rna_fastq_1: https://S3-<region>.awsamazon.com/bucket/path/to/<tumor_dna_prefix>1.fastq.gz
+
+# These are options that are used by most tools
+Universal_Options:
+    dockerhub: aarjunrao
+    java_Xmx: 20G
+    reference_build: hg38 # Acceptable options are hg38, hg38, GRCh37, GRCh38
+    # sse_key: /path/to/master.key # Path to the AWS master key.  Required if using AWS else optional
+    # sse_key_is_master: True # True or False.  Required if using AWS else optional
+    # gdc_download_token: /path/to/token.txt # Path to the user's GDC download token.
+    storage_location: Local # Local or aws:<bucket_name> for where the output must go
+    #storage_location: aws:protect-run-xyz
+    output_folder: /home/dranion/Flashdrive/project-results # Path to where the output must go.
+    #mail_to: test.email@host.com  # Email for sending success report.
+
+
+# These options are for each module. You probably don't need to change any of this!
+alignment:
+    cutadapt:
+        a: AGATCGGAAGAG
+        A: AGATCGGAAGAG
+        # version: 1.9.1
+    star:
+        type: star # use starlong if your reads are > 150bp
+        index: /home/dranion/Flashdrive/protect-data/star_with_fusion_100bp_readlen_indexes.tar.gz # Use star_without if you set star_fusion = False
+        # version: 2.5.2b
+    bwa:
+       index: /home/dranion/Flashdrive/protect-data/bwa_index.tar.gz
+        # version: 0.7.9a
+    post:
+        samtools:
+            # version: 1.2
+        picard:
+            # version: 1.135
+
+expression_estimation:
+    rsem:
+        index: /home/dranion/Flashdrive/protect-data/rsem_index.tar.gz
+        # version: 1.2.0
+
+mutation_calling:
+    indexes:
+        chromosomes: canonical_chr, chrM
+        genome_fasta: /home/dranion/Flashdrive/protect-data/hg38.fa.tar.gz
+        genome_fai: /home/dranion/Flashdrive/protect-data/hg38.fa.fai.tar.gz
+        genome_dict: /home/dranion/Flashdrive/protect-data/hg38.dict.tar.gz
+        cosmic_vcf: /home/dranion/Flashdrive/protect-data/CosmicCodingMuts.vcf.tar.gz
+        cosmic_idx: /home/dranion/Flashdrive/protect-data/CosmicCodingMuts.vcf.idx.tar.gz
+        dbsnp_vcf: /home/dranion/Flashdrive/protect-data/dbsnp_coding.vcf.gz
+        dbsnp_idx: /home/dranion/Flashdrive/protect-data/dbsnp_coding.vcf.idx.tar.gz
+        dbsnp_tbi: /home/dranion/Flashdrive/protect-data/dbsnp_coding.vcf.gz.tbi
+    mutect:
+        java_Xmx: 2G
+        # version: 1.1.7
+    muse:
+        # version: 1.0rc_submission_b391201
+    radia:
+        cosmic_beds: /home/dranion/Flashdrive/protect-data/radia_cosmic.tar.gz
+        dbsnp_beds: /home/dranion/Flashdrive/protect-data/radia_dbsnp.tar.gz
+        retrogene_beds: /home/dranion/Flashdrive/protect-data/radia_retrogenes.tar.gz
+        pseudogene_beds: /home/dranion/Flashdrive/protect-data/radia_pseudogenes.tar.gz
+        gencode_beds: /home/dranion/Flashdrive/protect-data/radia_gencode.tar.gz
+        # version: 398366ef07b5911d8082ed61cbf03d487a41f286
+    somaticsniper:
+        # version: 1.0.4
+        samtools:
+            # version: 0.1.8
+        bam_readcount:
+            # version: 0.7.4
+    star_fusion:
+        #run: True
+        #version: 1.0.0
+    fusion_inspector:
+        #run_trinity: True
+        #version: 1.0.1
+    strelka:
+        # version: 1.0.15
+        config_file: /home/dranion/Flashdrive/protect-data/strelka_bwa_WXS_config.ini.tar.gz
+
+
+mutation_annotation:
+    snpeff:
+        index: /home/dranion/Flashdrive/protect-data/snpeff_index.tar.gz
+        # version: 3.6
+        java_Xmx: 20G
+
+mutation_translation:
+    transgene:
+        gencode_peptide_fasta : /home/dranion/Flashdrive/protect-data/gencode.v25.pc_translations_NOPARY.fa.tar.gz
+        gencode_transcript_fasta : /home/dranion/Flashdrive/protect-data/gencode.v25.pc_transcripts_NOPARY.fa.tar.gz
+        gencode_annotation_gtf : /home/dranion/Flashdrive/protect-data/gencode.v25.annotation_NOPARY.gtf.tar.gz
+        genome_fasta : /home/dranion/Flashdrive/protect-data/hg38.fa.tar.gz
+        # version: 2.2.2
+
+haplotyping:
+    phlat:
+        index: /home/dranion/Flashdrive/protect-data/phlat_index.tar.gz
+        # version: 1.0
+
+mhc_peptide_binding:
+    mhci:
+        method_file: /home/dranion/Flashdrive/protect-data/mhci_restrictions.json.tar.gz
+        pred: IEDB_recommended
+        # version: 2.13
+    mhcii:
+        method_file: /home/dranion/Flashdrive/protect-data/mhcii_restrictions.json.tar.gz
+        pred: IEDB_recommended
+        # version: 2.13
+    netmhciipan:
+        # version: 3.1
+
+prediction_ranking:
+    rankboost:
+        mhci_args:
+            npa: 0.0
+            nph: 0.0
+            nMHC: 0.32
+            TPM: 0.0
+            overlap: 0.68
+            tndelta: 0.0
+        mhcii_args:
+            npa: 0.2
+            nph: 0.2
+            nMHC: 0.2
+            TPM: 0.2
+            tndelta: 0.2
+        # version: 2.0.3
+
+reports:
+    mhc_pathways_file: /home/dranion/Flashdrive/protect-data/mhc_pathways.tsv.tar.gz
+    itx_resistance_file: /home/dranion/Flashdrive/protect-data/itx_resistance.tsv.tar.gz
+    immune_resistance_pathways_file: /home/dranion/Flashdrive/protect-data/immune_resistance_pathways.json.tar.gz
+    car_t_targets_file: /home/dranion/Flashdrive/protect-data/car_t_targets.tsv.tar.gz
diff --git a/src/protect/__init__.py b/src/protect/__init__.py
index ba9b9130..dabfb0af 100644
--- a/src/protect/__init__.py
+++ b/src/protect/__init__.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,4 +13,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
+
diff --git a/src/protect/addons/__init__.py b/src/protect/addons/__init__.py
index ba9b9130..dabfb0af 100644
--- a/src/protect/addons/__init__.py
+++ b/src/protect/addons/__init__.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,4 +13,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
+
diff --git a/src/protect/addons/assess_car_t_validity.py b/src/protect/addons/assess_car_t_validity.py
index f28a2de4..e49a4469 100644
--- a/src/protect/addons/assess_car_t_validity.py
+++ b/src/protect/addons/assess_car_t_validity.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import print_function
+
 from protect.addons.common import TCGAToGTEx
 from protect.common import export_results, get_files_from_filestore, untargz
 from protect.haplotyping.phlat import parse_phlat_file
diff --git a/src/protect/addons/assess_immunotherapy_resistance.py b/src/protect/addons/assess_immunotherapy_resistance.py
index 568aacf9..6f96530b 100644
--- a/src/protect/addons/assess_immunotherapy_resistance.py
+++ b/src/protect/addons/assess_immunotherapy_resistance.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import print_function
+
 from protect.addons.common import TCGAToGTEx
 from protect.common import export_results, get_files_from_filestore, untargz
 
diff --git a/src/protect/addons/assess_mhc_pathway.py b/src/protect/addons/assess_mhc_pathway.py
index 149df4ba..5e152082 100644
--- a/src/protect/addons/assess_mhc_pathway.py
+++ b/src/protect/addons/assess_mhc_pathway.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import print_function
+
 from collections import Counter
 from protect.addons.common import TCGAToGTEx
 from protect.common import export_results, get_files_from_filestore, untargz
@@ -81,7 +81,7 @@ def assess_mhc_genes(job, gene_expression, rna_haplotype, univ_options, reports_
 
     # Read the patient gene values into a dictionary
     gene_expressions = pd.read_table(input_files['rsem_quant.tsv'], index_col=0, header=0)
-    gene_expressions = Counter({x.split('.')[0]: y for x, y in gene_expressions['TPM'].to_dict().items()})
+    gene_expressions = Counter({x.split('.')[0]: y for x, y in list(gene_expressions['TPM'].to_dict().items())})
     # Print the report
     roles = {x for x in background_df['Roles'].values if ',' not in x}
     with open('mhc_pathway_report.txt', 'w') as mpr:
@@ -119,7 +119,9 @@ def assess_mhc_genes(job, gene_expression, rna_haplotype, univ_options, reports_
                                                                      result, 2, result), file=mpr)
 
             for ensg in role_df.index:
-                ensgName = background_df.ix[ensg, 'Name']
+                #ix was depricated in 0.20.0 
+        		#ensgName = background_df.ix[ensg, 'Name']
+                ensgName = background_df.loc[ensg, 'Name']
                 b_vals = {}
                 for bkg in b_types:
                     val = "{0:.2f}".format(role_df.loc[ensg].get(b_types[bkg], default='NA'))
diff --git a/src/protect/addons/common.py b/src/protect/addons/common.py
index 8566ae83..5dc8be99 100644
--- a/src/protect/addons/common.py
+++ b/src/protect/addons/common.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import print_function
+
 
 from collections import Counter
 
diff --git a/src/protect/alignment/__init__.py b/src/protect/alignment/__init__.py
index ba9b9130..dabfb0af 100644
--- a/src/protect/alignment/__init__.py
+++ b/src/protect/alignment/__init__.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,4 +13,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
+
diff --git a/src/protect/alignment/common.py b/src/protect/alignment/common.py
index b959fc37..70cb72a0 100644
--- a/src/protect/alignment/common.py
+++ b/src/protect/alignment/common.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
+
 from math import ceil
 from protect.common import docker_call, docker_path, export_results, get_files_from_filestore
 
diff --git a/src/protect/alignment/dna.py b/src/protect/alignment/dna.py
index 105fa3d8..767065a1 100644
--- a/src/protect/alignment/dna.py
+++ b/src/protect/alignment/dna.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import, print_function
+
 from math import ceil
 
 from protect.alignment.common import index_bamfile, index_disk
@@ -128,7 +128,7 @@ def run_bwa(job, fastqs, sample_type, univ_options, bwa_options):
             input_files[read_file + gz] = input_files[read_file] + gz
     # Untar the index
     input_files['bwa_index'] = untargz(input_files['bwa_index.tar.gz'], work_dir)
-    input_files = {key: docker_path(path) for key, path in input_files.items()}
+    input_files = {key: docker_path(path) for key, path in list(input_files.items())}
 
     parameters = ['mem',
                   '-t', str(bwa_options['n']),
diff --git a/src/protect/alignment/rna.py b/src/protect/alignment/rna.py
index fe8fe86b..aeb38320 100644
--- a/src/protect/alignment/rna.py
+++ b/src/protect/alignment/rna.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import, print_function
+
 from collections import defaultdict
 from math import ceil
 
@@ -94,7 +94,7 @@ def run_star(job, fastqs, univ_options, star_options):
     if os.path.exists(star_fusion_idx):
         input_files['star_index'] = star_fusion_idx
 
-    input_files = {key: docker_path(path, work_dir=work_dir) for key, path in input_files.items()}
+    input_files = {key: docker_path(path, work_dir=work_dir) for key, path in list(input_files.items())}
 
     # Using recommended STAR-Fusion parameters:
     # https://github.com/STAR-Fusion/STAR-Fusion/wiki
diff --git a/src/protect/binding_prediction/__init__.py b/src/protect/binding_prediction/__init__.py
index ba9b9130..dabfb0af 100644
--- a/src/protect/binding_prediction/__init__.py
+++ b/src/protect/binding_prediction/__init__.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,4 +13,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
+
diff --git a/src/protect/binding_prediction/common.py b/src/protect/binding_prediction/common.py
index 3bead87e..2c58a9e9 100644
--- a/src/protect/binding_prediction/common.py
+++ b/src/protect/binding_prediction/common.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import, print_function
+
 from collections import defaultdict
 
 from protect.binding_prediction.mhci import predict_mhci_binding
@@ -108,7 +108,7 @@ def spawn_antigen_predictors(job, transgened_files, phlat_files, univ_options, m
             mhci_preds[(allele, peplen)] = mhci_job.addChildJobFn(
                 predict_normal_binding,
                 mhci_job.rv(),
-                {x: y for x, y in pept_files.items() if peplen in x},
+                {x: y for x, y in list(pept_files.items()) if peplen in x},
                 allele,
                 peplen,
                 univ_options,
@@ -125,7 +125,7 @@ def spawn_antigen_predictors(job, transgened_files, phlat_files, univ_options, m
         mhcii_preds[(allele, 15)] = mhcii_job.addFollowOnJobFn(
             predict_normal_binding,
             mhcii_job.rv(),
-            {x: y for x, y in pept_files.items() if '15' in x},
+            {x: y for x, y in list(pept_files.items()) if '15' in x},
             allele,
             '15',
             univ_options,
@@ -145,8 +145,8 @@ def read_fastas(input_files):
     :return: The read fastas in a dictionary of tuples
     :rtype: dict
     """
-    tumor_file = [y for x, y in input_files.items() if x.startswith('T')][0]
-    normal_file = [y for x, y in input_files.items() if x.startswith('N')][0]
+    tumor_file = [y for x, y in list(input_files.items()) if x.startswith('T')][0]
+    normal_file = [y for x, y in list(input_files.items()) if x.startswith('N')][0]
     output_files = defaultdict(list)
     output_files = _read_fasta(tumor_file, output_files)
     num_entries = len(output_files)
@@ -343,7 +343,7 @@ def _get_normal_peptides(job, mhc_df, iars, peplen):
     peplen = int(peplen)
     normal_peptides = []
     for pred in mhc_df.itertuples():
-        containing_iars = [i for i, sl in iars.items() if pred.pept in sl[0]]
+        containing_iars = [i for i, sl in list(iars.items()) if pred.pept in sl[0]]
         assert len(containing_iars) != 0, "No IARS contained the peptide"
         if len(iars[containing_iars[0]]) == 1:
             # This is a fusion and has no corresponding normal
@@ -351,7 +351,7 @@ def _get_normal_peptides(job, mhc_df, iars, peplen):
         else:
             # If there are multiple IARs, they all or none of them have to have a corresponding
             # normal.
-            if len(set([len(y) for x, y in iars.items() if x in containing_iars])) != 1:
+            if len(set([len(y) for x, y in list(iars.items()) if x in containing_iars])) != 1:
                 job.fileStore.logToMaster('Some IARS were found to contain the substring but were'
                                           'inconsistent with the presence of a corresponding '
                                           'normal.')
@@ -594,7 +594,7 @@ def print_mhc_peptide(neoepitope_info, peptides, pepmap, outfile, netmhc=False):
     if netmhc:
         peptide_names = [neoepitope_info.peptide_name]
     else:
-        peptide_names = [x for x, y in peptides.items() if neoepitope_info.pept in y]
+        peptide_names = [x for x, y in list(peptides.items()) if neoepitope_info.pept in y]
     # Convert named tuple to dict so it can be modified
     neoepitope_info = neoepitope_info._asdict()
     # Handle fusion peptides (They are characterized by having all N's as the normal partner)
diff --git a/src/protect/binding_prediction/mhci.py b/src/protect/binding_prediction/mhci.py
index 2708771d..f5679fbd 100644
--- a/src/protect/binding_prediction/mhci.py
+++ b/src/protect/binding_prediction/mhci.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import, print_function
+
 
 from protect.common import docker_call, get_files_from_filestore, read_peptide_file
 
diff --git a/src/protect/binding_prediction/mhcii.py b/src/protect/binding_prediction/mhcii.py
index 3e6f93a0..91db1f62 100644
--- a/src/protect/binding_prediction/mhcii.py
+++ b/src/protect/binding_prediction/mhcii.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import, print_function
+
 
 from protect.common import docker_call, get_files_from_filestore, read_peptide_file
 
diff --git a/src/protect/common.py b/src/protect/common.py
old mode 100644
new mode 100755
index 17d72b5e..467ca016
--- a/src/protect/common.py
+++ b/src/protect/common.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -21,16 +21,17 @@
 Program info can be found in the docstring of the main function.
 Details can also be obtained by running the script with -h .
 """
-from __future__ import print_function
+
 
 from collections import defaultdict
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
-from urlparse import urlparse
-
+from urllib.parse import urlparse
+from io import IOBase
 import errno
 import gzip
 import logging
+import shutil 
 import os
 import re
 import smtplib
@@ -38,7 +39,7 @@
 import subprocess
 import sys
 import tarfile
-import urllib2
+import urllib.request, urllib.error, urllib.parse
 import uuid
 
 
@@ -53,7 +54,7 @@ def get_files_from_filestore(job, files, work_dir, docker=False):
     :return: Dict of files: (optionallly docker-friendly) fileepaths
     :rtype: dict
     """
-    for name in files.keys():
+    for name in list(files.keys()):
         outfile = job.fileStore.readGlobalFile(files[name], '/'.join([work_dir, name]))
         # If the files will be sent to docker, we will mount work_dir to the container as /data and
         # we want the /data prefixed path to the file
@@ -98,7 +99,7 @@ def docker_call(tool, tool_parameters, work_dir, java_xmx=None, outfile=None,
     # If an outifle has been provided, then ensure that it is of type file, it is writeable, and
     # that it is open.
     if outfile:
-        assert isinstance(outfile, file), 'outfile was not passsed a file'
+        assert isinstance(outfile, IOBase), 'outfile was not passsed a file'
         assert outfile.mode in ['w', 'a', 'wb', 'ab'], 'outfile not writeable'
         assert not outfile.closed, 'outfile is closed'
     # If the call is interactive, set intereactive to -i
@@ -110,7 +111,7 @@ def docker_call(tool, tool_parameters, work_dir, java_xmx=None, outfile=None,
     docker_tool = ''.join([dockerhub, '/', tool, ':', tool_version])
     # Get the docker image on the worker if needed
     call = ['docker', 'images']
-    dimg_rv = subprocess.check_output(call)
+    dimg_rv = subprocess.check_output(call).decode('utf-8')
     existing_images = [':'.join(x.split()[0:2]) for x in dimg_rv.splitlines()
                        if x.startswith(dockerhub)]
 
@@ -160,7 +161,7 @@ def untargz(input_targz_file, untar_to_dir):
     return return_value
 
 
-def gunzip(input_gzip_file, block_size=1024):
+def gunzip(input_gzip_file, block_size=2048):
     """
     Gunzips the input file to the same directory
 
@@ -171,13 +172,8 @@ def gunzip(input_gzip_file, block_size=1024):
     assert os.path.splitext(input_gzip_file)[1] == '.gz'
     assert is_gzipfile(input_gzip_file)
     with gzip.open(input_gzip_file) as infile:
-        with open(os.path.splitext(input_gzip_file)[0], 'w') as outfile:
-            while True:
-                block = infile.read(block_size)
-                if block == '':
-                    break
-                else:
-                    outfile.write(block)
+        with open(os.path.splitext(input_gzip_file)[0], 'wb') as outfile:
+            shutil.copyfileobj(infile, outfile)
     return outfile.name
 
 
@@ -197,7 +193,7 @@ def is_gzipfile(filename):
         'point to a file.'
     with open(filename, 'rb') as in_f:
         start_of_file = in_f.read(3)
-        if start_of_file == '\x1f\x8b\x08':
+        if start_of_file == b'\x1f\x8b\x08':
             return True
         else:
             return False
@@ -355,8 +351,8 @@ def get_file_from_url(job, any_url, encryption_key=None, per_file_encryption=Tru
     url = any_url
     parsed_url = urlparse(any_url)
     try:
-        response = urllib2.urlopen(url)
-    except urllib2.HTTPError:
+        response = urllib.request.urlopen(url)
+    except urllib.error.HTTPError:
         if parsed_url.netloc.startswith(('s3', 'S3')):
             job.fileStore.logToMaster("Detected https link is for an encrypted s3 file.")
             return get_file_from_s3(job, any_url, encryption_key=encryption_key,
@@ -431,7 +427,7 @@ def export_results(job, fsid, file_name, univ_options, subfolder=None):
         # Handle Local
         try:
             # Create the directory if required
-            os.makedirs(output_folder, 0755)
+            os.makedirs(output_folder, 0o755)
         except OSError as err:
             if err.errno != errno.EEXIST:
                 raise
@@ -455,7 +451,7 @@ def delete_fastqs(job, patient_dict):
 
     :param dict patient_dict: Dict of list of input fastqs
     """
-    for key in patient_dict.keys():
+    for key in list(patient_dict.keys()):
         if 'fastq' not in key:
             continue
         job.fileStore.logToMaster('Deleting "%s:%s" ' % (patient_dict['patient_id'], key) +
@@ -472,10 +468,10 @@ def delete_bams(job, bams, patient_id):
     :param dict bams: Dict of bam and bai files
     :param str patient_id: The ID of the patient for logging purposes.
     """
-    bams = {b: v for b, v in bams.items()
+    bams = {b: v for b, v in list(bams.items())
             if (b.endswith('.bam') or b.endswith('.bai')) and v is not None}
     if bams:
-        for key, val in bams.items():
+        for key, val in list(bams.items()):
             job.fileStore.logToMaster('Deleting "%s" for patient "%s".' % (key, patient_id))
             job.fileStore.deleteGlobalFile(val)
     elif 'rna_genome' in bams:
@@ -600,7 +596,11 @@ def canonical_chrom_sorted(in_chroms):
     if 'MT' in in_chroms:
         in_chroms[in_chroms.index('MT')] = 'M'
         mt = True
-    in_chroms = sorted(in_chroms, key=lambda c: int(c) if c not in ('X', 'Y', 'M') else c)
+    num_in_chroms = sorted(filter(str.isnumeric, in_chroms),
+                       key = lambda c: int(c))
+    chr_in_chroms = sorted(filter(str.isalpha, in_chroms))
+    in_chroms = num_in_chroms.copy()
+    in_chroms.extend(chr_in_chroms)
     try:
         m_index = in_chroms.index('M')
     except ValueError:
@@ -656,4 +656,4 @@ def dummy_job(job, return_value):
     :param return_value: Any object of any pickle-able type
     :return: return_value
     """
-    return return_value
\ No newline at end of file
+    return return_value
diff --git a/src/protect/expression_profiling/__init__.py b/src/protect/expression_profiling/__init__.py
index ba9b9130..dabfb0af 100644
--- a/src/protect/expression_profiling/__init__.py
+++ b/src/protect/expression_profiling/__init__.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,4 +13,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
+
diff --git a/src/protect/expression_profiling/rsem.py b/src/protect/expression_profiling/rsem.py
index cc44699b..91c329d1 100644
--- a/src/protect/expression_profiling/rsem.py
+++ b/src/protect/expression_profiling/rsem.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import print_function
+
 from math import ceil
 
 from protect.common import (docker_call,
@@ -75,10 +75,10 @@ def run_rsem(job, rna_bam, univ_options, rsem_options):
     input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
 
     input_files['rsem_index'] = untargz(input_files['rsem_index.tar.gz'], work_dir)
-    input_files = {key: docker_path(path) for key, path in input_files.items()}
+    input_files = {key: docker_path(path) for key, path in list(input_files.items())}
 
     parameters = ['--paired-end',
-                  '-p', str(rsem_options['n']),
+                  '-p', str(20),
                   '--bam',
                   input_files['star_transcriptome.bam'],
                   '--no-bam-output',
diff --git a/src/protect/haplotyping/__init__.py b/src/protect/haplotyping/__init__.py
index ba9b9130..dabfb0af 100644
--- a/src/protect/haplotyping/__init__.py
+++ b/src/protect/haplotyping/__init__.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,4 +13,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
+
diff --git a/src/protect/haplotyping/phlat.py b/src/protect/haplotyping/phlat.py
index 1d34c9c1..43e7bdf8 100644
--- a/src/protect/haplotyping/phlat.py
+++ b/src/protect/haplotyping/phlat.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import, print_function
+
 from collections import defaultdict
 from math import ceil
 
@@ -51,6 +51,7 @@ def run_phlat(job, fastqs, sample_type, univ_options, phlat_options):
         'input_2.fastq': fastqs[1],
         'phlat_index.tar.gz': phlat_options['index']}
     input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
+    print(input_files)
     # Handle gzipped files
     gz = '.gz' if is_gzipfile(input_files['input_1.fastq']) else ''
     if gz:
@@ -59,7 +60,7 @@ def run_phlat(job, fastqs, sample_type, univ_options, phlat_options):
             input_files[read_file + gz] = input_files[read_file] + gz
     # Untar the index
     input_files['phlat_index'] = untargz(input_files['phlat_index.tar.gz'], work_dir)
-    input_files = {key: docker_path(path) for key, path in input_files.items()}
+    input_files = {key: docker_path(path) for key, path in list(input_files.items())}
 
     parameters = ['-1', input_files['input_1.fastq' + gz],
                   '-2', input_files['input_2.fastq' + gz],
@@ -68,7 +69,7 @@ def run_phlat(job, fastqs, sample_type, univ_options, phlat_options):
                   '-tag', sample_type,
                   '-e', '/home/phlat-1.0',  # Phlat directory home
                   '-o', '/data',  # Output directory
-                  '-p', str(phlat_options['n'])]  # Number of threads
+                  '-p', str(10)]  # Number of threads
     docker_call(tool='phlat', tool_parameters=parameters, work_dir=work_dir,
                 dockerhub=univ_options['dockerhub'], tool_version=phlat_options['version'])
     output_file = job.fileStore.writeGlobalFile(''.join([work_dir, '/', sample_type, '_HLA.sum']))
@@ -182,13 +183,13 @@ def most_probable_alleles(allele_list):
         except KeyError:
             all_alleles[allele] = [float(pvalue)]
     # If there are less than 2 alleles, report all
-    if len(all_alleles.keys()) <= 2:
-        return all_alleles.keys()
+    if len(list(all_alleles.keys())) <= 2:
+        return list(all_alleles.keys())
     # Else, get the two with most evidence.  Evidence is gauged by
     # a) How many files (of the 3) thought that Allele was present
     # b) In a tie, who has a lower avg p value
     # In the lambda function, if 2 alleles have the same number of calls, the sum of the p values is
     # a measure of the avg because avg = sum / n and n is equal in both of them.
     else:
-        return sorted(all_alleles.keys(),
+        return sorted(list(all_alleles.keys()),
                       key=lambda x: (-len(all_alleles[x]), sum(all_alleles[x])))[0:2]
diff --git a/src/protect/mutation_annotation/__init__.py b/src/protect/mutation_annotation/__init__.py
index ba9b9130..dabfb0af 100644
--- a/src/protect/mutation_annotation/__init__.py
+++ b/src/protect/mutation_annotation/__init__.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,4 +13,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
+
diff --git a/src/protect/mutation_annotation/snpeff.py b/src/protect/mutation_annotation/snpeff.py
index 43ed03c0..ab54dc24 100644
--- a/src/protect/mutation_annotation/snpeff.py
+++ b/src/protect/mutation_annotation/snpeff.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import, print_function
+
 from math import ceil
 
 from protect.common import (docker_call,
@@ -45,7 +45,7 @@ def run_snpeff(job, merged_mutation_file, univ_options, snpeff_options):
         'snpeff_index.tar.gz': snpeff_options['index']}
     input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
     input_files['snpeff_index'] = untargz(input_files['snpeff_index.tar.gz'], work_dir)
-    input_files = {key: docker_path(path) for key, path in input_files.items()}
+    input_files = {key: docker_path(path) for key, path in list(input_files.items())}
 
     parameters = ['eff',
                   '-dataDir', input_files['snpeff_index'],
diff --git a/src/protect/mutation_calling/__init__.py b/src/protect/mutation_calling/__init__.py
index ba9b9130..dabfb0af 100644
--- a/src/protect/mutation_calling/__init__.py
+++ b/src/protect/mutation_calling/__init__.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,4 +13,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
+
diff --git a/src/protect/mutation_calling/common.py b/src/protect/mutation_calling/common.py
index 947426d2..cac561d4 100644
--- a/src/protect/mutation_calling/common.py
+++ b/src/protect/mutation_calling/common.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import, print_function
+
 from collections import defaultdict
 
 from protect.common import chrom_sorted, export_results, get_files_from_filestore, untargz
@@ -63,7 +63,7 @@ def run_mutation_aggregator(job, mutation_results, univ_options):
     """
     # Setup an input data structure for the merge function
     out = {}
-    for chrom in mutation_results['mutect'].keys():
+    for chrom in list(mutation_results['mutect'].keys()):
         out[chrom] = job.addChildJobFn(merge_perchrom_mutations, chrom, mutation_results,
                                        univ_options).rv()
     merged_snvs = job.addFollowOnJobFn(merge_perchrom_vcfs, out, 'merged', univ_options)
@@ -110,7 +110,7 @@ def merge_perchrom_mutations(job, chrom, mutations, univ_options):
 
     accepted_hits = defaultdict(dict)
 
-    for mut_type in vcf_processor.keys():
+    for mut_type in list(vcf_processor.keys()):
         # Get input files
         perchrom_mutations = {caller: vcf_processor[mut_type][caller](job, mutations[caller][chrom],
                                                                       work_dir, univ_options)
@@ -119,12 +119,12 @@ def merge_perchrom_mutations(job, chrom, mutations, univ_options):
         perchrom_mutations['strelka'] = perchrom_mutations['strelka_' + mut_type]
         perchrom_mutations.pop('strelka_' + mut_type)
         # Read in each file to a dict
-        vcf_lists = {caller: read_vcf(vcf_file) for caller, vcf_file in perchrom_mutations.items()}
-        all_positions = list(set(itertools.chain(*vcf_lists.values())))
+        vcf_lists = {caller: read_vcf(vcf_file) for caller, vcf_file in list(perchrom_mutations.items())}
+        all_positions = list(set(itertools.chain(*list(vcf_lists.values()))))
         for position in sorted(all_positions):
-            hits = {caller: position in vcf_lists[caller] for caller in perchrom_mutations.keys()}
+            hits = {caller: position in vcf_lists[caller] for caller in list(perchrom_mutations.keys())}
             if sum(hits.values()) >= majority[mut_type]:
-                callers = ','.join([caller for caller, hit in hits.items() if hit])
+                callers = ','.join([caller for caller, hit in list(hits.items()) if hit])
                 assert position[1] not in accepted_hits[position[0]]
                 accepted_hits[position[0]][position[1]] = (position[2], position[3], callers)
 
@@ -133,7 +133,7 @@ def merge_perchrom_mutations(job, chrom, mutations, univ_options):
         print('##INFO=<ID=callers,Number=.,Type=String,Description=List of supporting callers.',
               file=outfile)
         print('#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO', file=outfile)
-        for chrom in chrom_sorted(accepted_hits.keys()):
+        for chrom in chrom_sorted(list(accepted_hits.keys())):
             for position in sorted(accepted_hits[chrom]):
                     print(chrom, position, '.', accepted_hits[chrom][position][0],
                           accepted_hits[chrom][position][1], '.', 'PASS',
@@ -172,11 +172,11 @@ def merge_perchrom_vcfs(job, perchrom_vcfs, tool_name, univ_options):
     :rtype: toil.fileStore.FileID
     """
     work_dir = os.getcwd()
-    input_files = {''.join([chrom, '.vcf']): jsid for chrom, jsid in perchrom_vcfs.items()}
+    input_files = {''.join([chrom, '.vcf']): jsid for chrom, jsid in list(perchrom_vcfs.items())}
     input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
     first = True
     with open(''.join([work_dir, '/', 'all_merged.vcf']), 'w') as outvcf:
-        for chromvcfname in chrom_sorted([x.rstrip('.vcf') for x in input_files.keys()]):
+        for chromvcfname in chrom_sorted([x.rstrip('.vcf') for x in list(input_files.keys())]):
             with open(input_files[chromvcfname + '.vcf'], 'r') as infile:
                 for line in infile:
                     line = line.strip()
@@ -233,7 +233,7 @@ def unmerge(job, input_vcf, tool_name, chromosomes, tool_options, univ_options):
         print(''.join(header), file=read_chromosomes[chrom], end='')
     outdict = {}
     chroms = set(chromosomes).intersection(set(read_chromosomes.keys()))
-    for chrom, chromvcf in read_chromosomes.items():
+    for chrom, chromvcf in list(read_chromosomes.items()):
         chromvcf.close()
         if chrom not in chroms:
             continue
diff --git a/src/protect/mutation_calling/fusion.py b/src/protect/mutation_calling/fusion.py
index 4908e02a..4c5cca99 100644
--- a/src/protect/mutation_calling/fusion.py
+++ b/src/protect/mutation_calling/fusion.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import print_function
+
 from math import ceil
 
 from bd2k.util.expando import Expando
@@ -106,7 +106,7 @@ def run_fusion(job,
     input_files['tool_index'] = os.path.basename(untargz(input_files['tool_index.tar.gz'],
                                                          work_dir))
 
-    cores = star_fusion_options['n']
+    cores = 20
     parameters.extend(['--output_dir', '/data/fusion-output',
                        '--genome_lib_dir', input_files['tool_index'],
                        '--CPU', str(cores)])
@@ -129,9 +129,9 @@ def run_fusion(job,
     # Check for fusion prediction
     with open(fusion_path, 'r') as f:
         # Skip header
-        f.next()
+        next(f)
         try:
-            f.next()
+            next(f)
         except StopIteration:
             logging.warning('%s: Did not find any fusions!' % univ_options['patient'])
             return
@@ -168,7 +168,7 @@ def run_fusion(job,
     # fusion fragments per million reads
     if os.path.exists(fusion_path):
         with open(fusion_path, 'r') as f, open(output_path, 'w') as g:
-            g.write(f.next())
+            g.write(next(f))
             for line in f:
                 fields = line.strip().split()
 
@@ -241,7 +241,7 @@ def parse_star_fusion(infile):
     :rtype: bd2k.util.expando.Expando
     """
     reader = csv.reader(infile, delimiter='\t')
-    header = reader.next()
+    header = next(reader)
     header = {key: index for index, key in enumerate(header)}
 
     features = ['LeftGene', 'LeftLocalBreakpoint', 'LeftBreakpoint',
@@ -267,8 +267,8 @@ def get_transcripts(transcript_file):
         while True:
             # Usually the transcript is on one line
             try:
-                info = fa.next()
-                seq = fa.next()
+                info = next(fa)
+                seq = next(fa)
 
                 assert info.startswith('>')
 
@@ -297,7 +297,8 @@ def split_fusion_transcript(annotation_path, transcripts):
 
     forward = 'ACGTN'
     reverse = 'TGCAN'
-    trans = string.maketrans(forward, reverse)
+    # string.maketrans depricated, so each type has its own maketrans methods
+    trans = str.maketrans(forward, reverse)
 
     # Pull in assembled transcript annotation
     five_pr_splits = collections.defaultdict(dict)
@@ -435,13 +436,13 @@ def reformat_star_fusion_output(job,
             score = 'Junction:%s-Spanning:%s' % (record.JunctionReadCount, record.SpanningFragCount)
 
             # Add empty sequences in case Trinity doesn't output one
-            if len(five_pr_splits[fusion].keys()) == 0:
+            if len(list(five_pr_splits[fusion].keys())) == 0:
                 five_pr_splits[fusion]['N/A'] = '.'
 
-            if len(three_pr_splits[fusion].keys()) == 0:
+            if len(list(three_pr_splits[fusion].keys())) == 0:
                 three_pr_splits[fusion]['N/A'] = '.'
 
-            for transcript_id in five_pr_splits[fusion].keys():
+            for transcript_id in list(five_pr_splits[fusion].keys()):
                 five_prime_seq = five_pr_splits[fusion][transcript_id]
                 three_prime_seq = three_pr_splits[fusion][transcript_id]
 
diff --git a/src/protect/mutation_calling/indel.py b/src/protect/mutation_calling/indel.py
index 7c8990da..fc05981a 100644
--- a/src/protect/mutation_calling/indel.py
+++ b/src/protect/mutation_calling/indel.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import print_function
+
 
 
 def run_indel_caller(job, tumor_bam, normal_bam, univ_options, indel_options):
diff --git a/src/protect/mutation_calling/muse.py b/src/protect/mutation_calling/muse.py
index 28be1c6b..fc837067 100644
--- a/src/protect/mutation_calling/muse.py
+++ b/src/protect/mutation_calling/muse.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import print_function
+
 from collections import defaultdict
 from math import ceil
 
@@ -125,7 +125,7 @@ def run_muse_perchrom(job, tumor_bam, normal_bam, univ_options, muse_options, ch
 
     for key in ('genome.fa', 'genome.fa.fai'):
         input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
-    input_files = {key: docker_path(path) for key, path in input_files.items()}
+    input_files = {key: docker_path(path) for key, path in list(input_files.items())}
 
     output_prefix = os.path.join(work_dir, chrom)
 
@@ -162,9 +162,9 @@ def run_muse_sump_perchrom(job, muse_output, univ_options, muse_options, chrom):
     tbi = os.path.splitext(input_files['dbsnp_coding.vcf.gz.tbi.tmp'])[0]
     time.sleep(2)
     shutil.copy(input_files['dbsnp_coding.vcf.gz.tbi.tmp'], tbi)
-    os.chmod(tbi, 0777)
+    os.chmod(tbi, 0o777)
     open(tbi, 'a').close()
-    input_files = {key: docker_path(path) for key, path in input_files.items()}
+    input_files = {key: docker_path(path) for key, path in list(input_files.items())}
     output_file = ''.join([work_dir, '/', chrom, '.vcf'])
 
     parameters = ['sump',
diff --git a/src/protect/mutation_calling/mutect.py b/src/protect/mutation_calling/mutect.py
index eb1a556f..9068b7b8 100644
--- a/src/protect/mutation_calling/mutect.py
+++ b/src/protect/mutation_calling/mutect.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import print_function
+
 from collections import defaultdict
 from math import ceil
 
@@ -105,7 +105,9 @@ def run_mutect_perchrom(job, tumor_bam, normal_bam, univ_options, mutect_options
     :return: fsID for the chromsome vcf
     :rtype: toil.fileStore.FileID
     """
+       
     work_dir = os.getcwd()
+   
     input_files = {
         'tumor.bam': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
         'tumor.bam.bai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'],
@@ -119,13 +121,14 @@ def run_mutect_perchrom(job, tumor_bam, normal_bam, univ_options, mutect_options
         'dbsnp.vcf.gz': mutect_options['dbsnp_vcf'],
         'dbsnp.vcf.idx.tar.gz': mutect_options['dbsnp_idx']}
     input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
+   
     # dbsnp.vcf should be bgzipped, but all others should be tar.gz'd
     input_files['dbsnp.vcf'] = gunzip(input_files['dbsnp.vcf.gz'])
+    #input_files['dbsnp.vcf'] = 
     for key in ('genome.fa', 'genome.fa.fai', 'genome.dict', 'cosmic.vcf', 'cosmic.vcf.idx',
                 'dbsnp.vcf.idx'):
         input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
-    input_files = {key: docker_path(path) for key, path in input_files.items()}
-
+    input_files = {key: docker_path(path) for key, path in list(input_files.items())}
     mutout = ''.join([work_dir, '/', chrom, '.out'])
     mutvcf = ''.join([work_dir, '/', chrom, '.vcf'])
     parameters = ['-R', input_files['genome.fa'],
diff --git a/src/protect/mutation_calling/radia.py b/src/protect/mutation_calling/radia.py
index 178bd8dd..9ba7b863 100644
--- a/src/protect/mutation_calling/radia.py
+++ b/src/protect/mutation_calling/radia.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import print_function
+
 from collections import defaultdict
 from math import ceil
 
@@ -87,7 +87,7 @@ def run_radia(job, rna_bam, tumor_bam, normal_bam, univ_options, radia_options):
                  +- 'chrM': fsID
     :rtype: dict
     """
-    if 'rna_genome' in rna_bam.keys():
+    if 'rna_genome' in list(rna_bam.keys()):
         rna_bam = rna_bam['rna_genome']
     elif set(rna_bam.keys()) == {'rna_genome_sorted.bam', 'rna_genome_sorted.bam.bai'}:
         pass
@@ -151,7 +151,7 @@ def run_radia_perchrom(job, bams, univ_options, radia_options, chrom):
 
     for key in ('genome.fa', 'genome.fa.fai'):
         input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
-    input_files = {key: docker_path(path) for key, path in input_files.items()}
+    input_files = {key: docker_path(path) for key, path in list(input_files.items())}
 
     radia_output = ''.join([work_dir, '/radia_', chrom, '.vcf'])
     radia_log = ''.join([work_dir, '/radia_', chrom, '_radia.log'])
@@ -214,7 +214,7 @@ def run_filter_radia(job, bams, radia_file, univ_options, radia_options, chrom):
     for key in ('cosmic_beds', 'dbsnp_beds', 'retrogene_beds', 'pseudogene_beds', 'gencode_beds'):
         input_files[key] = untargz(input_files[key], work_dir)
 
-    input_files = {key: docker_path(path) for key, path in input_files.items()}
+    input_files = {key: docker_path(path) for key, path in list(input_files.items())}
 
     filterradia_log = ''.join([work_dir, '/radia_filtered_', chrom, '_radia.log'])
     parameters = [univ_options['patient'],  # shortID
diff --git a/src/protect/mutation_calling/somaticsniper.py b/src/protect/mutation_calling/somaticsniper.py
index ae148f04..42825439 100644
--- a/src/protect/mutation_calling/somaticsniper.py
+++ b/src/protect/mutation_calling/somaticsniper.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import print_function
+
 from collections import defaultdict
 from math import ceil
 
@@ -146,7 +146,7 @@ def run_somaticsniper_full(job, tumor_bam, normal_bam, univ_options, somaticsnip
 
     for key in ('genome.fa', 'genome.fa.fai'):
         input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
-    input_files = {key: docker_path(path) for key, path in input_files.items()}
+    input_files = {key: docker_path(path) for key, path in list(input_files.items())}
 
     output_file = os.path.join(work_dir, 'somatic-sniper_full.vcf')
     parameters = ['-f', input_files['genome.fa'],
@@ -190,7 +190,7 @@ def filter_somaticsniper(job, tumor_bam, somaticsniper_output, tumor_pileup, uni
 
     for key in ('genome.fa', 'genome.fa.fai'):
         input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
-    input_files = {key: docker_path(path) for key, path in input_files.items()}
+    input_files = {key: docker_path(path) for key, path in list(input_files.items())}
 
     # Run snpfilter.pl
     parameters = ['snpfilter.pl',
@@ -278,7 +278,7 @@ def run_pileup(job, tumor_bam, univ_options, somaticsniper_options):
 
     for key in ('genome.fa', 'genome.fa.fai'):
         input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
-    input_files = {key: docker_path(path) for key, path in input_files.items()}
+    input_files = {key: docker_path(path) for key, path in list(input_files.items())}
 
     parameters = ['pileup',
                   '-cvi',
diff --git a/src/protect/mutation_calling/strelka.py b/src/protect/mutation_calling/strelka.py
index 0a1a0e7b..172e4691 100644
--- a/src/protect/mutation_calling/strelka.py
+++ b/src/protect/mutation_calling/strelka.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import print_function
+
 from math import ceil
 
 from protect.common import (docker_call,
@@ -130,7 +130,7 @@ def run_strelka_full(job, tumor_bam, normal_bam, univ_options, strelka_options):
 
     for key in ('genome.fa', 'genome.fa.fai', 'config.ini'):
         input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
-    input_files = {key: docker_path(path) for key, path in input_files.items()}
+    input_files = {key: docker_path(path) for key, path in list(input_files.items())}
 
     parameters = [input_files['config.ini'],
                   input_files['tumor.bam'],
diff --git a/src/protect/mutation_translation.py b/src/protect/mutation_translation.py
index 68e76221..21f3ee1e 100644
--- a/src/protect/mutation_translation.py
+++ b/src/protect/mutation_translation.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import, print_function
+
 from collections import defaultdict
 from math import ceil
 
@@ -83,12 +83,12 @@ def run_transgene(job, snpeffed_file, rna_bam, univ_options, transgene_options,
     input_files['pepts.fa'] = untargz(input_files['pepts.fa.tar.gz'], work_dir)
     input_files['genome.fa'] = untargz(input_files['genome.fa.tar.gz'], work_dir)
     input_files['annotation.gtf'] = untargz(input_files['annotation.gtf.tar.gz'], work_dir)
-    input_files = {key: docker_path(path) for key, path in input_files.items()}
+    input_files = {key: docker_path(path) for key, path in list(input_files.items())}
 
     parameters = ['--peptides', input_files['pepts.fa'],
                   '--prefix', 'transgened',
                   '--pep_lens', '9,10,15',
-                  '--cores', str(transgene_options['n']),
+                  '--cores', str(20),
                   '--genome', input_files['genome.fa'],
                   '--annotation', input_files['annotation.gtf']]
 
@@ -107,7 +107,7 @@ def run_transgene(job, snpeffed_file, rna_bam, univ_options, transgene_options,
 
         fusion_files = get_files_from_filestore(job, fusion_files, work_dir, docker=False)
         fusion_files['transcripts.fa'] = untargz(fusion_files['transcripts.fa.tar.gz'], work_dir)
-        fusion_files = {key: docker_path(path) for key, path in fusion_files.items()}
+        fusion_files = {key: docker_path(path) for key, path in list(fusion_files.items())}
         parameters += ['--transcripts', fusion_files['transcripts.fa'],
                        '--fusions', fusion_files['fusion_calls']]
 
diff --git a/src/protect/pipeline/ProTECT.py b/src/protect/pipeline/ProTECT.py
index 6b40fb79..c0ce828b 100644
--- a/src/protect/pipeline/ProTECT.py
+++ b/src/protect/pipeline/ProTECT.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -21,7 +21,7 @@
 Program info can be found in the docstring of the main function.
 Details can also be obtained by running the script with -h .
 """
-from __future__ import print_function
+
 from collections import defaultdict
 from multiprocessing import cpu_count
 
@@ -160,7 +160,7 @@ def _add_default_entries(input_dict, defaults_dict):
     :return: updated dict
     :rtype: dict
     """
-    for key, value in defaults_dict.iteritems():
+    for key, value in defaults_dict.items():
         if key == 'patients':
             print('Cannot default `patients`.')
             continue
@@ -302,8 +302,8 @@ def parse_patients(job, patient_dict, skip_fusions=False):
         if f + '_fastq_2' not in output_dict:
             output_dict[f + '_fastq_2'] = get_fastq_2(job, patient_dict['patient_id'], f,
                                                       output_dict[f + '_fastq_1'])
-    output_dict['gdc_inputs'] = [k for k, v in output_dict.items() if str(v).startswith('gdc')]
-    if not any('dna' in k for k in output_dict.keys()):
+    output_dict['gdc_inputs'] = [k for k, v in list(output_dict.items()) if str(v).startswith('gdc')]
+    if not any('dna' in k for k in list(output_dict.keys())):
         # There are no input DNA files so we cannot filter for oxog
         output_dict['filter_for_OxoG'] = False
     return output_dict
@@ -354,7 +354,7 @@ def _parse_config_file(job, config_file, max_cores=None):
 
     # Flags to check for presence of encryption keys if required
     gdc_inputs = ssec_encrypted = False
-    for key in input_config.keys():
+    for key in list(input_config.keys()):
         if key == 'patients':
             # Ensure each patient contains the required entries
             for sample_name in input_config[key]:
@@ -410,7 +410,7 @@ def _parse_config_file(job, config_file, max_cores=None):
                     if key == 'alignment':
                         append_subgroup = ['post']
                     elif key == 'mutation_calling':
-                        mutation_caller_list = input_config[key].keys()
+                        mutation_caller_list = list(input_config[key].keys())
                         append_subgroup = []
                     else:
                         append_subgroup = []
@@ -443,7 +443,7 @@ def parse_config_file(job, config_file, max_cores=None):
     sample_set, univ_options, processed_tool_inputs = _parse_config_file(job, config_file,
                                                                          max_cores)
     # Start a job for each sample in the sample set
-    for patient_id in sample_set.keys():
+    for patient_id in list(sample_set.keys()):
         job.addFollowOnJobFn(launch_protect, sample_set[patient_id], univ_options,
                              processed_tool_inputs)
     return None
@@ -513,7 +513,7 @@ def launch_protect(job, patient_data, univ_options, tool_options):
         haplotype_patient = job.wrapJobFn(get_patient_mhc_haplotype, sample_prep.rv())
         sample_prep.addChild(haplotype_patient)
     else:
-        assert None not in fastq_files.values()
+        assert None not in list(fastq_files.values())
         # We are guaranteed to have fastqs here
         for sample_type in 'tumor_dna', 'normal_dna', 'tumor_rna':
             phlat_files[sample_type] = job.wrapJobFn(
@@ -633,7 +633,7 @@ def launch_protect(job, patient_data, univ_options, tool_options):
         # Fusions have been handled above, and we don't need to align DNA
         get_mutations = None
     else:
-        assert (None, None) not in zip(fastq_files.values(), bam_files.values())
+        assert (None, None) not in list(zip(list(fastq_files.values()), list(bam_files.values())))
         for sample_type in 'tumor_dna', 'normal_dna':
             if bam_files[sample_type] is None:
                 assert fastq_files[sample_type] is not None
@@ -677,7 +677,7 @@ def launch_protect(job, patient_data, univ_options, tool_options):
                 bam_files[sample_type].addChild(mutations[caller])
         bam_files['tumor_rna'].addChild(mutations['radia'])
         get_mutations = job.wrapJobFn(run_mutation_aggregator,
-                                      {caller: cjob.rv() for caller, cjob in mutations.items()},
+                                      {caller: cjob.rv() for caller, cjob in list(mutations.items())},
                                       univ_options, disk='100M', memory='100M',
                                       cores=1).encapsulate()
         for caller in mutations:
@@ -687,7 +687,6 @@ def launch_protect(job, patient_data, univ_options, tool_options):
         # We may need the tumor one depending on OxoG
         if not patient_data['filter_for_OxoG']:
             get_mutations.addChild(delete_bam_files['tumor_dna'])
-
     if get_mutations is not None:
         snpeff = job.wrapJobFn(run_snpeff, get_mutations.rv(), univ_options, tool_options['snpeff'],
                                disk=PromisedRequirement(snpeff_disk,
@@ -714,18 +713,15 @@ def launch_protect(job, patient_data, univ_options, tool_options):
         transgene.addChild(delete_bam_files['tumor_dna'])
     if fusions:
         fusions.addChild(transgene)
-
     spawn_mhc = job.wrapJobFn(spawn_antigen_predictors, transgene.rv(), haplotype_patient.rv(),
                               univ_options, (tool_options['mhci'], tool_options['mhcii']),
                               disk='100M', memory='100M', cores=1).encapsulate()
     haplotype_patient.addChild(spawn_mhc)
     transgene.addChild(spawn_mhc)
-
     merge_mhc = job.wrapJobFn(merge_mhc_peptide_calls, spawn_mhc.rv(), transgene.rv(), univ_options,
                               disk='100M', memory='100M', cores=1)
     spawn_mhc.addFollowOn(merge_mhc)
     transgene.addChild(merge_mhc)
-
     rankboost = job.wrapJobFn(wrap_rankboost, rsem.rv(), merge_mhc.rv(), transgene.rv(),
                               univ_options, tool_options['rankboost'], disk='100M', memory='100M',
                               cores=1)
diff --git a/src/protect/pipeline/__init__.py b/src/protect/pipeline/__init__.py
index ba9b9130..dabfb0af 100644
--- a/src/protect/pipeline/__init__.py
+++ b/src/protect/pipeline/__init__.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,4 +13,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
+
diff --git a/src/protect/qc/__init__.py b/src/protect/qc/__init__.py
index ba9b9130..dabfb0af 100644
--- a/src/protect/qc/__init__.py
+++ b/src/protect/qc/__init__.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,4 +13,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
+
diff --git a/src/protect/qc/rna.py b/src/protect/qc/rna.py
index 42334920..37b8ad99 100644
--- a/src/protect/qc/rna.py
+++ b/src/protect/qc/rna.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import print_function
+
 from math import ceil
 
 from protect.common import docker_call, docker_path, get_files_from_filestore, is_gzipfile
@@ -47,7 +47,7 @@ def run_cutadapt(job, fastqs, univ_options, cutadapt_options):
         for read_file in 'rna_1.fastq', 'rna_2.fastq':
             os.symlink(read_file, read_file + gz)
             input_files[read_file + gz] = input_files[read_file] + gz
-    input_files = {key: docker_path(path) for key, path in input_files.items()}
+    input_files = {key: docker_path(path) for key, path in list(input_files.items())}
     parameters = ['-a', cutadapt_options['a'],  # Fwd read 3' adapter
                   '-A', cutadapt_options['A'],  # Rev read 3' adapter
                   '-m', '35',  # Minimum size of read
diff --git a/src/protect/rankboost.py b/src/protect/rankboost.py
index bd9bf514..2c0b0a53 100644
--- a/src/protect/rankboost.py
+++ b/src/protect/rankboost.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
diff --git a/src/protect/test/__init__.py b/src/protect/test/__init__.py
index 382c581b..219056ee 100644
--- a/src/protect/test/__init__.py
+++ b/src/protect/test/__init__.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -16,7 +16,7 @@
 
 # A lot of this code was taken from toil/test/src/__init__.py
 
-from __future__ import absolute_import
+
 import logging
 import os
 import tempfile
diff --git a/src/protect/test/ci/__init__.py b/src/protect/test/ci/__init__.py
index ba9b9130..dabfb0af 100644
--- a/src/protect/test/ci/__init__.py
+++ b/src/protect/test/ci/__init__.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,4 +13,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
+
diff --git a/src/protect/test/ci/test_protect.py b/src/protect/test/ci/test_protect.py
index 2fb3eae8..906160dc 100644
--- a/src/protect/test/ci/test_protect.py
+++ b/src/protect/test/ci/test_protect.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -19,7 +19,7 @@
 Affiliation : UCSC BME, UCSC Genomics Institute
 File : protect/test/test_reporting.py
 """
-from __future__ import print_function
+
 
 from protect.test import ProtectTest
 
@@ -173,8 +173,8 @@ def _test_ran_successfully(self, expected_dirs):
         expected_contents = {}
         for dir in expected_dirs:
             if isinstance(dir, dict):
-                assert len(dir.keys()) == 1
-                if dir.keys()[0] == 'mutations':
+                assert len(list(dir.keys())) == 1
+                if list(dir.keys())[0] == 'mutations':
                     expected_contents['mutations'] = ('/mnt/ephemeral/done/TEST/mutations',
                                                       sorted(dir['mutations']),
                                                       [])
@@ -198,7 +198,7 @@ def _test_ran_successfully(self, expected_dirs):
                         else:
                             expected_contents['mutations_' + caller] = \
                                 contents_per_dir['mutations'][caller]
-                elif dir.keys()[0] == 'alignments':
+                elif list(dir.keys())[0] == 'alignments':
                     alignment_files = []
                     for tissue_type in dir['alignments']:
                         alignment_files.extend(contents_per_dir['alignments'][tissue_type])
@@ -211,7 +211,7 @@ def _test_ran_successfully(self, expected_dirs):
                 expected_contents[dir] = contents_per_dir[dir]
 
         expected_outputs = [('/mnt/ephemeral/done/TEST',
-                             sorted([x for x in expected_contents.keys()
+                             sorted([x for x in list(expected_contents.keys())
                                       if not x.startswith('mutations_')]),
                              [])]
         expected_outputs.extend([expected_contents[d] for d in sorted(expected_contents.keys())])
diff --git a/src/protect/test/unit/__init__.py b/src/protect/test/unit/__init__.py
index ba9b9130..dabfb0af 100644
--- a/src/protect/test/unit/__init__.py
+++ b/src/protect/test/unit/__init__.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -13,4 +13,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import
+
diff --git a/src/protect/test/unit/test_alignments.py b/src/protect/test/unit/test_alignments.py
index a8d2d3a8..0ea387eb 100644
--- a/src/protect/test/unit/test_alignments.py
+++ b/src/protect/test/unit/test_alignments.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -19,7 +19,7 @@
 Affiliation : UCSC BME, UCSC Genomics Institute
 File : protect/test/test_file_downloads.py
 """
-from __future__ import print_function
+
 import os
 import subprocess
 
@@ -75,7 +75,7 @@ def _get_test_bwa_files(job):
 
         :return: FSID for the rsem file
         """
-        base_call = 's3am download s3://cgl-pipeline-inputs/protect/ci_references/'
+        base_call = 's3am download S3://protect-data/hg38_references/'
         subprocess.check_call((base_call + 'Tum_1.fq.gz Tum_1.fq.gz').split(' '))
         subprocess.check_call((base_call + 'Tum_2.fq.gz Tum_2.fq.gz').split(' '))
         return [job.fileStore.writeGlobalFile('Tum_1.fq.gz'),
@@ -105,7 +105,7 @@ def _get_test_star_files(job):
 
         :return: FSID for the rsem file
         """
-        base_call = 's3am download s3://cgl-pipeline-inputs/protect/ci_references/'
+        base_call = 's3am download S3://protect-data/hg38_references/'
         subprocess.check_call((base_call + 'Rna_1.fq.gz Rna_1.fq.gz').split(' '))
         subprocess.check_call((base_call + 'Rna_2.fq.gz Rna_2.fq.gz').split(' '))
         return [job.fileStore.writeGlobalFile('Rna_1.fq.gz'),
diff --git a/src/protect/test/unit/test_file_downloads.py b/src/protect/test/unit/test_file_downloads.py
index 35994668..39f115a2 100644
--- a/src/protect/test/unit/test_file_downloads.py
+++ b/src/protect/test/unit/test_file_downloads.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -19,7 +19,7 @@
 Affiliation : UCSC BME, UCSC Genomics Institute
 File : protect/test/test_file_downloads.py
 """
-from __future__ import print_function
+
 
 from protect.common import get_file_from_s3
 from protect.common import get_file_from_url
diff --git a/src/protect/test/unit/test_mutation_callers.py b/src/protect/test/unit/test_mutation_callers.py
index efa5d3e7..c30702da 100644
--- a/src/protect/test/unit/test_mutation_callers.py
+++ b/src/protect/test/unit/test_mutation_callers.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -19,7 +19,7 @@
 Affiliation : UCSC BME, UCSC Genomics Institute
 File : protect/test/test_file_downloads.py
 """
-from __future__ import print_function
+
 from protect.common import untargz
 from protect.mutation_calling.muse import run_muse
 from protect.mutation_calling.mutect import run_mutect
@@ -101,7 +101,7 @@ def test_star_fusion(self):
     def _get_fusion_options(job):
         star_fusion_options = {}
         fusion_inspector_options = {}
-        call = 's3am download S3://cgl-pipeline-inputs/protect/ci_references/ci_star_fusion_compatible_index.tar.gz ./index.tar.gz'
+        call = 's3am download S3://protect-data/hg38_references/ci_star_fusion_compatible_index.tar.gz ./index.tar.gz'
         subprocess.check_call(call.split(' '))
         star_fusion_options['index'] = fusion_inspector_options['index'] = job.fileStore.writeGlobalFile('index.tar.gz')
         # Trinity now sets a minimum for the number of reads, so don't run it
@@ -192,7 +192,7 @@ def _get_test_fusion_reads(job):
         :return: FSID for each paired FASTQ
         """
 
-        base_call = 's3am download S3://cgl-pipeline-inputs/protect/ci_references/'
+        base_call = 's3am download S3://protect-data/hg38_references/'
         samples = ['RNA_CD74_ROS1_1.fq.gz', 'RNA_CD74_ROS1_2.fq.gz']
         for sample in samples:
             call = '{base}{sample} ./{sample}'.format(base=base_call, sample=sample)
@@ -209,7 +209,7 @@ def _get_test_fusion_junction(job):
         :return: FSID for each paired FASTQ
         """
 
-        base_call = 's3am download S3://cgl-pipeline-inputs/protect/ci_references/'
+        base_call = 's3am download S3://protect-data/hg38_references/'
         sample = 'CD74_ROS1_Chimeric.out.junction'
         call = '{base}{sample} ./ChimericJunction'.format(base=base_call, sample=sample)
         subprocess.check_call(call.split(' '))
diff --git a/src/protect/test/unit/test_rankboost.py b/src/protect/test/unit/test_rankboost.py
index 0c271f19..442ae22b 100644
--- a/src/protect/test/unit/test_rankboost.py
+++ b/src/protect/test/unit/test_rankboost.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -19,7 +19,7 @@
 Affiliation : UCSC BME, UCSC Genomics Institute
 File : protect/test/test_rankboost.py
 """
-from __future__ import print_function
+
 
 from protect.pipeline.ProTECT import _parse_config_file
 from protect.rankboost import wrap_rankboost
diff --git a/src/protect/test/unit/test_reporting.py b/src/protect/test/unit/test_reporting.py
index 9c17fc91..00572421 100644
--- a/src/protect/test/unit/test_reporting.py
+++ b/src/protect/test/unit/test_reporting.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -19,7 +19,7 @@
 Affiliation : UCSC BME, UCSC Genomics Institute
 File : protect/test/test_reporting.py
 """
-from __future__ import print_function
+
 
 import os
 
diff --git a/src/protect/test/unit/test_snpeff.py b/src/protect/test/unit/test_snpeff.py
index 39381c55..c016123d 100644
--- a/src/protect/test/unit/test_snpeff.py
+++ b/src/protect/test/unit/test_snpeff.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -19,7 +19,7 @@
 Affiliation : UCSC BME, UCSC Genomics Institute
 File : protect/test/test_snpeff.py
 """
-from __future__ import print_function
+
 
 from protect.mutation_annotation.snpeff import run_snpeff
 from protect.pipeline.ProTECT import _parse_config_file
diff --git a/src/protect/test/unit/test_spawn_antigen_predictors.py b/src/protect/test/unit/test_spawn_antigen_predictors.py
index f00bb393..db3bfa5c 100644
--- a/src/protect/test/unit/test_spawn_antigen_predictors.py
+++ b/src/protect/test/unit/test_spawn_antigen_predictors.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
@@ -19,7 +19,7 @@
 Affiliation : UCSC BME, UCSC Genomics Institute
 File : protect/test/test_spawn_antigen_predictors.py
 """
-from __future__ import print_function
+
 
 from protect.binding_prediction.common import spawn_antigen_predictors, merge_mhc_peptide_calls
 from protect.pipeline.ProTECT import _parse_config_file
diff --git a/src/protect/test/unit/test_transgene b/src/protect/test/unit/test_transgene
index 8adfeaec..5a7ac91e 100644
--- a/src/protect/test/unit/test_transgene
+++ b/src/protect/test/unit/test_transgene
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #
diff --git a/src/protect/version.py b/src/protect/version.py
index f0db84aa..b42ec478 100644
--- a/src/protect/version.py
+++ b/src/protect/version.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/env python3
 # Copyright 2016 UCSC Computational Genomics Lab
 # Original contributor: Arjun Arkal Rao
 #