diff --git a/.gitignore b/.gitignore index 3288e17f..bfab4c26 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +bd2k-extras/ pimmuno.py pimmuno_2.py *.pyc @@ -5,6 +6,7 @@ pimmuno_2.py develop_data/ venv/ .cache/ +jobStore/ test-report.xml __pycache__ *.DONE diff --git a/MANUAL.md b/MANUAL.md index b023baad..2ef4b1eb 100644 --- a/MANUAL.md +++ b/MANUAL.md @@ -27,87 +27,55 @@ ProTECT is implemented in the [Toil](https://github.com/BD2KGenomics/toil.git) f runs the workflow described in [protect/Flowchart.txt]( https://github.com/BD2KGenomics/protect/blob/master/Flowchart.txt). +**This manual is a quick adaptation for an adaptation of ProTECT to py3** + # Installation ProTECT requires Toil and we recommend installing ProTECT and its requirements in a [virtualenv](http://docs.python-guide.org/en/latest/dev/virtualenvs/). -ProTECT also requires [s3am](https://github.com/BD2KGenomics/s3am.git) version 2.0.1 to download and +~ProTECT also requires [s3am](https://github.com/BD2KGenomics/s3am.git) version 2.0.1 to download and upload files from S3. We recommend installing s3am in its own virtualenv using the directions in the s3am manual, then putting the s3am binary on your $PATH. ProTECT will NOT attempt to install -s3am during installation. +s3am during installation.~ -ProTECT uses pkg_resources from setuptools to verify versions of tools during install. As of setuptools -39.0.1, some modules were moved to the packaging module. If your machine has setuptools >=39.0.1, you -will need the packaging module. +currently WIP. for now, **only references to local files will work**. anything that requires access to s3am (s3 buckets) will **fail**. Lastly, ProTECT uses [docker](https://www.docker.com/) to run the various sub-tools in a reproducible, platform independent manner. ProTECT will NOT attempt to install docker during installation. -### Method 1 - Using PIP (recommended) - -First create a virtualenv at your desired location (Here we create it in the folder ~/venvs) - - virtualenv ~/venvs/protect - -Activate the virtualenv - - source ~/venvs/protect/bin/activate - -NOTE: Installation was tested using pip 7.1.2 and 8.1.1. We have seen issues with the installation -of pyYAML with lower versions of pip and recommend upgrading pip before installing ProTECT. - - pip install --upgrade pip - -Install Toil - - pip install toil[aws]==3.5.2 - -Install packaging (required if setuptools>=39.0.1) - - pip install packaging - -Install ProTECT and all dependencies in the virtualenv - - pip install protect - +~Method 1 - Using PIP (recommended)~ ### Method 2 - Installing from Source This will install ProTECT in an editable mode. Obtain the source from Github - git clone https://www.github.com/BD2KGenomics/protect.git + git clone https://www.github.com/Dranion/protect.git Create and activate a virtualenv in the project folder (Important since the Makefile checks for this and will fail if it detects that you are not in a virtual environment) cd protect - virtualenv venv + virtualenv --python=python3 venv source venv/bin/activate Install Toil and pytest make prepare -Install packaging (required if setuptools>=39.0.1) +Install the python3 conversion of bd2k and s3am. *s3am is untested as I am running locally* - pip install packaging + make special_install Install ProTECT make develop -## Method 3 - Using Docker +~Method 3 - Using Docker~ -Dockerized versions of ProTECT releases can be found at https://quay.io/organization/ucsc_cgl. These -Docker containers run the ProTECT pipeline in single machine mode. The only difference between the -Docker and Python versions of the pipeline is that the Docker container takes the config options, -described below, as command line arguments as opposed to a config file. Running the container -without any arguments will list all the available options. Also, currently the dockerized version of -ProTECT only supports local file export. # Running ProTECT @@ -173,7 +141,7 @@ in the pipeline, and the information on the input samples. Elements before a `:` dictionary read into ProTECT and should **NOT** be modified (Barring the patient ID key in the patients dictionary). Only values to the right of the `:` should be edited. -Every required reference file is provided in the AWS bucket `cgl-pipeline-inputs` under the folder +Every required reference file is provided in the AWS bucket `protect-data` under the folder `protect/hg19_references` or `protect/hg38_references`. The `README` file in the same location describes in detail how each file was generated. To use a file located in an s3 bucket, replace `/path/to` in the following descriptions with `s3:///`. @@ -547,7 +515,7 @@ purposes: 12: g/f/jobO4yiE4 return self.run(fileStore) 13: g/f/jobO4yiE4 File "/home/ucsc/arjun/tools/dev/toil_clean/src/toil/job.py", line 1406, in run 14: g/f/jobO4yiE4 rValue = userFunction(*((self,) + tuple(self._args)), **self._kwargs) - 15: g/f/jobO4yiE4 File "/home/ucsc/arjun/tools/protect_toil_clean/local/lib/python2.7/site-packages/protect/binding_prediction/common.py", line 566, in merge_mhc_peptide_calls + 15: g/f/jobO4yiE4 File "/home/ucsc/arjun/tools/protect_toil_clean/local/lib/python3/site-packages/protect/binding_prediction/common.py", line 566, in merge_mhc_peptide_calls 16: g/f/jobO4yiE4 raise RuntimeError('No peptides available for ranking') 17: g/f/jobO4yiE4 RuntimeError: No peptides available for ranking 18: g/f/jobO4yiE4 ERROR:toil.worker:Exiting the worker because of a failed job on host sjcb10st7 @@ -581,9 +549,9 @@ do not store logs from tools (see BD2KGenomics/protect#275). The error looks sim Z/O/job1uH92D return self.run(fileStore) Z/O/job1uH92D File "/home/ucsc/arjun/tools/dev/toil_clean/src/toil/job.py", line 1406, in run Z/O/job1uH92D rValue = userFunction(*((self,) + tuple(self._args)), **self._kwargs) - Z/O/job1uH92D File "/home/ucsc/arjun/tools/protect_toil_clean/local/lib/python2.7/site-packages/protect/mutation_calling/radia.py", line 238, in run_filter_radia + Z/O/job1uH92D File "/home/ucsc/arjun/tools/protect_toil_clean/local/lib/python3/site-packages/protect/mutation_calling/radia.py", line 238, in run_filter_radia Z/O/job1uH92D tool_version=radia_options['version']) - Z/O/job1uH92D File "/home/ucsc/arjun/tools/protect_toil_clean/local/lib/python2.7/site-packages/protect/common.py", line 138, in docker_call + Z/O/job1uH92D File "/home/ucsc/arjun/tools/protect_toil_clean/local/lib/python3/site-packages/protect/common.py", line 138, in docker_call Z/O/job1uH92D 'for command \"%s\"' % ' '.join(call),) Z/O/job1uH92D RuntimeError: docker command returned a non-zero exit status (1)for command "docker run --rm=true -v /scratch/bio/ucsc/toil-681c097c-61da-4687-b734-c5051f0aa19f/tmped2fnu/f041f939-5c0d-40be-a884-68635e929d09:/data --log-driver=none aarjunrao/filterradia:bcda721fc1f9c28d8b9224c2f95c440759cd3a03 TCGA-CH-5788 17 /data/radia.vcf /data /home/radia/scripts -d /data/radia_dbsnp -r /data/radia_retrogenes -p /data/radia_pseudogenes -c /data/radia_cosmic -t /data/radia_gencode --noSnpEff --noBlacklist --noTargets --noRnaBlacklist -f /data/hg38.fa --log=INFO -g /data/radia_filtered_chr17_radia.log" Z/O/job1uH92D ERROR:toil.worker:Exiting the worker because of a failed job on host sjcb10st1 diff --git a/Makefile b/Makefile old mode 100644 new mode 100755 index a5a43cb2..e2e8f2d2 --- a/Makefile +++ b/Makefile @@ -45,17 +45,22 @@ help: @echo "$$help" -python=python2.7 -pip=pip2.7 +python=python +pip=pip tests=src/protect/test/unit extras= - green=\033[0;32m normal=\033[0m red=\033[0;31m +# WIP +special_install: check_venv + git clone https://github.com/Dranion/bd2k-extras.git + make -C bd2k-extras/bd2k-python-lib develop + make -C bd2k-extras/s3am develop + prepare: check_venv - @$(pip) install toil==3.8.0 pytest==2.8.3 + @$(pip) install toil pytest develop: check_venv $(pip) install -e .$(extras) @@ -107,11 +112,10 @@ clean_pypi: clean: clean_develop clean_sdist clean_pypi - check_venv: - @$(python) -c 'import sys; sys.exit( int( not hasattr(sys, "real_prefix") ) )' \ - || ( echo "$(red)A virtualenv must be active.$(normal)" ; false ) - + @$(python) -c 'import sys; sys.exit( int( not (hasattr(sys, "real_prefix") or ( hasattr(sys, "base_prefix") and sys.base_prefix != sys.prefix ) ) ) )' \ + || [ ! -z "${VIRTUAL_ENV}" ] \ + || ( echo "$(red)A virtualenv must be active.$(normal)\n" ; false ) check_clean_working_copy: @echo "$(green)Checking if your working copy is clean ...$(normal)" diff --git a/README.md b/README.md index c7306f5a..eaf1868f 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,10 @@ -[![Stories in Ready](https://badge.waffle.io/BD2KGenomics/protect.png?label=ready&title=Ready)](https://waffle.io/BD2KGenomics/protect) # ProTECT ### **Pr**ediction **o**f **T**-Cell **E**pitopes for **C**ancer **T**herapy +Adapation of ProTECT to use python 3.8 instead of 2.7. Currently have tested a complete run using fastq files from [HCC1395 WGS Exome RNA Seq Data](https://github.com/genome/gms/wiki/HCC1395-WGS-Exome-RNA-Seq-Data), with identical results in both version of python. + +Adaptation done using 2to3 and manual bug testing. Manual changes recorded [at changes.md](changes.md). Since s3am is python2, **currently is local only**, however an untested python3 version of s3am exists [here](https://github.com/Dranion/bd2k-extras/tree/main). Continuing to the original README: + This repo contains the Python libraries for the Precision Immunology Pipeline developed at UCSC. src/protect/pipeline/ProTECT.py - The python script for running the pipeline. @@ -20,6 +23,6 @@ All docker images used in this pipeline are available at To learn how the pipeline can be run on a sample, head over to the [ProTECT Manual]( -https://github.com/BD2KGenomics/protect/blob/master/MANUAL.md) +https://github.com/Dranion/protect/blob/master/MANUAL.md) ProTECT is currently in its infancy and is under continuous development. We would appreciate users sharing the level 3 data produced by ProTECT with us such that we can better train our predictive models. diff --git a/attic/ProTECT.py b/attic/ProTECT.py index d8ae8690..a3953145 100644 --- a/attic/ProTECT.py +++ b/attic/ProTECT.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 Arjun Arkal Rao # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,7 +20,7 @@ Program info can be found in the docstring of the main function. Details can also be obtained by running the script with -h . """ -from __future__ import print_function + import argparse import errno @@ -33,7 +33,7 @@ import time from collections import defaultdict, Counter from multiprocessing import cpu_count -from urlparse import urlparse +from urllib.parse import urlparse from pysam import Samfile @@ -78,7 +78,7 @@ def parse_config_file(job, config_file): # along with it's parameters. for groupname, group_params in tool_specific_param_generator(job, conf): if groupname == 'patient': - if 'patient_id' not in group_params.keys(): + if 'patient_id' not in list(group_params.keys()): raise ParameterError('A patient group is missing the patient_id flag.') sample_set[group_params['patient_id']] = group_params elif groupname == 'Universal_Options': @@ -104,7 +104,7 @@ def parse_config_file(job, config_file): raise ParameterError(' The following tools have no arguments in the config file : \n' + '\n'.join(missing_tools)) # Start a job for each sample in the sample set - for patient_id in sample_set.keys(): + for patient_id in list(sample_set.keys()): job.addFollowOnJobFn(pipeline_launchpad, sample_set[patient_id], univ_options, tool_options) return None @@ -248,7 +248,7 @@ def delete_fastqs(job, fastqs): +- 'normal_dna': [ , ] """ for fq_type in ['tumor_rna', 'tumor_dna', 'normal_dna']: - for i in xrange(0,2): + for i in range(0,2): job.fileStore.deleteGlobalFile(fastqs[fq_type][i]) return None @@ -727,7 +727,7 @@ def spawn_radia(job, rna_bam, tumor_bam, normal_bam, univ_options, radia_options 'normal_dnai': normal_bam['normal_dna_fix_pg_sorted.bam.bai']} # Make a dict object to hold the return values for each of the chromosome jobs. Then run radia # on each chromosome. - chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']] + chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']] perchrom_radia = defaultdict() for chrom in chromosomes: perchrom_radia[chrom] = job.addChildJobFn(run_radia, bams, univ_options, radia_options, @@ -755,11 +755,11 @@ def merge_radia(job, perchrom_rvs): work_dir = job.fileStore.getLocalTempDir() # We need to squash the input dict of dicts to a single dict such that it can be passed to # get_files_from_filestore - input_files = {filename: jsid for perchrom_files in perchrom_rvs.values() - for filename, jsid in perchrom_files.items()} + input_files = {filename: jsid for perchrom_files in list(perchrom_rvs.values()) + for filename, jsid in list(perchrom_files.items())} input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) - chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']] + chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']] with open('/'.join([work_dir, 'radia_calls.vcf']), 'w') as radfile, \ open('/'.join([work_dir, 'radia_filter_passing_calls.vcf']), 'w') as radpassfile: for chrom in chromosomes: @@ -961,7 +961,7 @@ def spawn_mutect(job, tumor_bam, normal_bam, univ_options, mutect_options): job.fileStore.logToMaster('Running spawn_mutect on %s' % univ_options['patient']) # Make a dict object to hold the return values for each of the chromosome # jobs. Then run mutect on each chromosome. - chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']] + chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']] perchrom_mutect = defaultdict() for chrom in chromosomes: perchrom_mutect[chrom] = job.addChildJobFn(run_mutect, tumor_bam, normal_bam, univ_options, @@ -987,10 +987,10 @@ def merge_mutect(job, perchrom_rvs): work_dir = job.fileStore.getLocalTempDir() # We need to squash the input dict of dicts to a single dict such that it can be passed to # get_files_from_filestore - input_files = {filename: jsid for perchrom_files in perchrom_rvs.values() - for filename, jsid in perchrom_files.items()} + input_files = {filename: jsid for perchrom_files in list(perchrom_rvs.values()) + for filename, jsid in list(perchrom_files.items())} input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) - chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']] + chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']] with open('/'.join([work_dir, 'mutect_calls.vcf']), 'w') as mutvcf, \ open('/'.join([work_dir, 'mutect_calls.out']), 'w') as mutout, \ open('/'.join([work_dir, 'mutect_passing_calls.vcf']), 'w') as mutpassvcf: @@ -1139,7 +1139,7 @@ def run_mutation_aggregator(job, fusion_output, radia_output, mutect_output, ind input_files.pop('fusion.vcf') # read files into memory vcf_file = defaultdict() - mutcallers = input_files.keys() + mutcallers = list(input_files.keys()) with open(''.join([work_dir, '/', univ_options['patient'], '_merged_mutations.vcf']), 'w') as merged_mut_file: for mut_caller in mutcallers: @@ -1571,8 +1571,8 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files): mhci_files = get_files_from_filestore(job, mhci_preds, work_dir) # First split mhcii_preds into prediction files and predictors and maintain keys so we can later # reference them in pairs - mhcii_predictors = {x: y[1] for x, y in mhcii_preds.items()} - mhcii_files = {x: y[0] for x, y in mhcii_preds.items()} + mhcii_predictors = {x: y[1] for x, y in list(mhcii_preds.items())} + mhcii_files = {x: y[0] for x, y in list(mhcii_preds.items())} mhcii_files = get_files_from_filestore(job, mhcii_files, work_dir) # Get peptide files pept_files = get_files_from_filestore(job, pept_files, work_dir) @@ -1584,7 +1584,7 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files): pepmap = json.load(mapfile) # Incorporate peptide names into the merged calls with open('/'.join([work_dir, 'mhci_merged_files.list']), 'w') as mhci_resfile: - for mhcifile in mhci_files.values(): + for mhcifile in list(mhci_files.values()): with open(mhcifile, 'r') as mf: for line in mf: # Skip header lines @@ -1605,7 +1605,7 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files): # Incorporate peptide names into the merged calls with open('/'.join([work_dir, 'mhcii_merged_files.list']), 'w') as \ mhcii_resfile: - for mhciifile in mhcii_files.keys(): + for mhciifile in list(mhcii_files.keys()): core_col = None # Variable to hold the column number with the core if mhcii_predictors[mhciifile] == 'Consensus': with open(mhcii_files[mhciifile], 'r') as mf: @@ -1814,7 +1814,7 @@ def prepare_samples(job, fastqs, univ_options): 'normal_dna_fastq_prefix'} if set(fastqs.keys()).difference(allowed_samples) != {'patient_id'}: raise ParameterError('Sample with the following parameters has an error:\n' + - '\n'.join(fastqs.values())) + '\n'.join(list(fastqs.values()))) # For each sample type, check if the prefix is an S3 link or a regular file # Download S3 files. for sample_type in ['tumor_dna', 'tumor_rna', 'normal_dna']: @@ -1877,7 +1877,7 @@ def get_files_from_filestore(job, files, work_dir, cache=True, docker=False): work_dir is the location where the file should be stored cache indiciates whether caching should be used """ - for name in files.keys(): + for name in list(files.keys()): outfile = job.fileStore.readGlobalFile(files[name], '/'.join([work_dir, name]), cache=cache) # If the file pointed to a tarball, extract it to WORK_DIR if tarfile.is_tarfile(outfile) and file_xext(outfile).startswith('.tar'): @@ -1924,15 +1924,15 @@ def most_probable_alleles(allele_list): except KeyError: all_alleles[allele] = [float(pvalue)] # If there are less than 2 alleles, report all - if len(all_alleles.keys()) <= 2: - return all_alleles.keys() + if len(list(all_alleles.keys())) <= 2: + return list(all_alleles.keys()) # Else, get the two with most evidence. Evidence is gauged by # a) How many files (of the 3) thought that Allele was present # b) In a tie, who has a lower avg p value # In the lambda function, if 2 alleles have the same number of calls, the sum of the p values is # a measure of the avg because avg = sum / n and n is equal in both of them. else: - return sorted(all_alleles.keys(), key=lambda x: \ + return sorted(list(all_alleles.keys()), key=lambda x: \ (-len(all_alleles[x]), sum(all_alleles[x])))[0:2] @@ -2111,7 +2111,7 @@ def print_mhc_peptide(neoepitope_info, peptides, pepmap, outfile): """ allele, pept, pred, core = neoepitope_info - peptide_names = [x for x, y in peptides.items() if pept in y] + peptide_names = [x for x, y in list(peptides.items()) if pept in y] # For each peptide, append the ensembl gene for peptide_name in peptide_names: print(allele, pept, peptide_name, core, '0', pred, pepmap[peptide_name], sep='\t', @@ -2514,7 +2514,7 @@ def strip_xext(filepath): :return str filepath: Path to the file with the compression extension stripped off. """ ext_size = len(file_xext(filepath).split('.')) - 1 - for i in xrange(0, ext_size): + for i in range(0, ext_size): filepath = os.path.splitext(filepath)[0] return filepath diff --git a/attic/ProTECT_large.py b/attic/ProTECT_large.py index 40a7a200..485ea322 100644 --- a/attic/ProTECT_large.py +++ b/attic/ProTECT_large.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 Arjun Arkal Rao # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,7 +20,7 @@ Program info can be found in the docstring of the main function. Details can also be obtained by running the script with -h . """ -from __future__ import print_function + import argparse import base64 @@ -79,7 +79,7 @@ def parse_config_file(job, config_file): # along with it's parameters. for groupname, group_params in tool_specific_param_generator(job, conf): if groupname == 'patient': - if 'patient_id' not in group_params.keys(): + if 'patient_id' not in list(group_params.keys()): raise ParameterError('A patient group is missing the patient_id flag.') sample_set[group_params['patient_id']] = group_params elif groupname == 'Universal_Options': @@ -101,7 +101,7 @@ def parse_config_file(job, config_file): raise ParameterError(' The following tools have no arguments in the config file : \n' + '\n'.join(missing_tools)) # Start a job for each sample in the sample set - for patient_id in sample_set.keys(): + for patient_id in list(sample_set.keys()): job.addFollowOnJobFn(pipeline_launchpad, sample_set[patient_id], univ_options, tool_options) return None @@ -304,7 +304,7 @@ def delete_fastqs(job, fastqs): +- 'normal_dna': [ , ] """ for fq_type in ['tumor_rna', 'tumor_dna', 'normal_dna']: - for i in xrange(0,2): + for i in range(0,2): job.fileStore.deleteGlobalFile(fastqs[fq_type][i]) return None @@ -708,7 +708,7 @@ def spawn_radia(job, rna_bam, tumor_bam, normal_bam, univ_options, radia_options 'normal_dnai': normal_bam['normal_dna_fix_pg_sorted.bam.bai']} # Make a dict object to hold the return values for each of the chromosome jobs. Then run radia # on each chromosome. - chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']] + chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']] perchrom_radia = defaultdict() for chrom in chromosomes: perchrom_radia[chrom] = job.addChildJobFn(run_radia, bams, univ_options, radia_options, @@ -736,11 +736,11 @@ def merge_radia(job, perchrom_rvs): work_dir = job.fileStore.getLocalTempDir() # We need to squash the input dict of dicts to a single dict such that it can be passed to # get_files_from_filestore - input_files = {filename: jsid for perchrom_files in perchrom_rvs.values() - for filename, jsid in perchrom_files.items()} + input_files = {filename: jsid for perchrom_files in list(perchrom_rvs.values()) + for filename, jsid in list(perchrom_files.items())} input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) - chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']] + chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']] with open('/'.join([work_dir, 'radia_calls.vcf']), 'w') as radfile, \ open('/'.join([work_dir, 'radia_filter_passing_calls.vcf']), 'w') as radpassfile: for chrom in chromosomes: @@ -942,7 +942,7 @@ def spawn_mutect(job, tumor_bam, normal_bam, univ_options, mutect_options): job.fileStore.logToMaster('Running spawn_mutect on %s' % univ_options['patient']) # Make a dict object to hold the return values for each of the chromosome # jobs. Then run mutect on each chromosome. - chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']] + chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']] perchrom_mutect = defaultdict() for chrom in chromosomes: perchrom_mutect[chrom] = job.addChildJobFn(run_mutect, tumor_bam, normal_bam, univ_options, @@ -968,10 +968,10 @@ def merge_mutect(job, perchrom_rvs): work_dir = job.fileStore.getLocalTempDir() # We need to squash the input dict of dicts to a single dict such that it can be passed to # get_files_from_filestore - input_files = {filename: jsid for perchrom_files in perchrom_rvs.values() - for filename, jsid in perchrom_files.items()} + input_files = {filename: jsid for perchrom_files in list(perchrom_rvs.values()) + for filename, jsid in list(perchrom_files.items())} input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) - chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']] + chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']] with open('/'.join([work_dir, 'mutect_calls.vcf']), 'w') as mutvcf, \ open('/'.join([work_dir, 'mutect_calls.out']), 'w') as mutout, \ open('/'.join([work_dir, 'mutect_passing_calls.vcf']), 'w') as mutpassvcf: @@ -1120,7 +1120,7 @@ def run_mutation_aggregator(job, fusion_output, radia_output, mutect_output, ind input_files.pop('fusion.vcf') # read files into memory vcf_file = defaultdict() - mutcallers = input_files.keys() + mutcallers = list(input_files.keys()) with open(''.join([work_dir, '/', univ_options['patient'], '_merged_mutations.vcf']), 'w') as merged_mut_file: for mut_caller in mutcallers: @@ -1566,8 +1566,8 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files): mhci_files = get_files_from_filestore(job, mhci_preds, work_dir) # First split mhcii_preds into prediction files and predictors and maintain keys so we can later # reference them in pairs - mhcii_predictors = {x: y[1] for x, y in mhcii_preds.items()} - mhcii_files = {x: y[0] for x, y in mhcii_preds.items()} + mhcii_predictors = {x: y[1] for x, y in list(mhcii_preds.items())} + mhcii_files = {x: y[0] for x, y in list(mhcii_preds.items())} mhcii_files = get_files_from_filestore(job, mhcii_files, work_dir) # Get peptide files pept_files = get_files_from_filestore(job, pept_files, work_dir) @@ -1579,7 +1579,7 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files): pepmap = json.load(mapfile) # Incorporate peptide names into the merged calls with open('/'.join([work_dir, 'mhci_merged_files.list']), 'w') as mhci_resfile: - for mhcifile in mhci_files.values(): + for mhcifile in list(mhci_files.values()): with open(mhcifile, 'r') as mf: for line in mf: # Skip header lines @@ -1600,7 +1600,7 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files): # Incorporate peptide names into the merged calls with open('/'.join([work_dir, 'mhcii_merged_files.list']), 'w') as \ mhcii_resfile: - for mhciifile in mhcii_files.keys(): + for mhciifile in list(mhcii_files.keys()): core_col = None # Variable to hold the column number with the core if mhcii_predictors[mhciifile] == 'Consensus': with open(mhcii_files[mhciifile], 'r') as mf: @@ -1807,7 +1807,7 @@ def prepare_samples(job, fastqs, univ_options): 'normal_dna_fastq_prefix'} if set(fastqs.keys()).difference(allowed_samples) != {'patient_id'}: raise ParameterError('Sample with the following parameters has an error:\n' + - '\n'.join(fastqs.values())) + '\n'.join(list(fastqs.values()))) # For each sample type, check if the prefix is an S3 link or a regular file # Download S3 files. for sample_type in ['tumor_dna', 'tumor_rna', 'normal_dna']: @@ -1869,7 +1869,7 @@ def get_files_from_filestore(job, files, work_dir, cache=True, docker=False): work_dir is the location where the file should be stored cache indiciates whether caching should be used """ - for name in files.keys(): + for name in list(files.keys()): outfile = job.fileStore.readGlobalFile(files[name], '/'.join([work_dir, name]), cache=cache) # If the file pointed to a tarball, extract it to WORK_DIR if tarfile.is_tarfile(outfile) and file_xext(outfile).startswith('.tar'): @@ -1916,15 +1916,15 @@ def most_probable_alleles(allele_list): except KeyError: all_alleles[allele] = [float(pvalue)] # If there are less than 2 alleles, report all - if len(all_alleles.keys()) <= 2: - return all_alleles.keys() + if len(list(all_alleles.keys())) <= 2: + return list(all_alleles.keys()) # Else, get the two with most evidence. Evidence is gauged by # a) How many files (of the 3) thought that Allele was present # b) In a tie, who has a lower avg p value # In the lambda function, if 2 alleles have the same number of calls, the sum of the p values is # a measure of the avg because avg = sum / n and n is equal in both of them. else: - return sorted(all_alleles.keys(), key=lambda x: \ + return sorted(list(all_alleles.keys()), key=lambda x: \ (-len(all_alleles[x]), sum(all_alleles[x])))[0:2] @@ -2103,7 +2103,7 @@ def print_mhc_peptide(neoepitope_info, peptides, pepmap, outfile): """ allele, pept, pred, core = neoepitope_info - peptide_names = [x for x, y in peptides.items() if pept in y] + peptide_names = [x for x, y in list(peptides.items()) if pept in y] # For each peptide, append the ensembl gene for peptide_name in peptide_names: print(allele, pept, peptide_name, core, '0', pred, pepmap[peptide_name], sep='\t', @@ -2446,7 +2446,7 @@ def strip_xext(filepath): :return str filepath: Path to the file with the compression extension stripped off. """ ext_size = len(file_xext(filepath).split('.')) - 1 - for i in xrange(0, ext_size): + for i in range(0, ext_size): filepath = os.path.splitext(filepath)[0] return filepath diff --git a/attic/encrypt_files_in_dir_to_s3.py b/attic/encrypt_files_in_dir_to_s3.py index 67fac013..69bd0360 100644 --- a/attic/encrypt_files_in_dir_to_s3.py +++ b/attic/encrypt_files_in_dir_to_s3.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright (C) 2016 UCSC Computational Genomics Lab # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,7 +24,7 @@ Move files in a directory, or entire directory structures to S3 with (or without) encryption. """ -from __future__ import print_function + import argparse import base64 import hashlib diff --git a/attic/precision_immuno.py b/attic/precision_immuno.py index 73963b43..d958b8c2 100644 --- a/attic/precision_immuno.py +++ b/attic/precision_immuno.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 Arjun Arkal Rao # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,7 +20,7 @@ Program info can be found in the docstring of the main function. Details can also be obtained by running the script with -h . """ -from __future__ import print_function + import argparse import base64 @@ -77,7 +77,7 @@ def parse_config_file(job, config_file): # along with it's parameters. for groupname, group_params in tool_specific_param_generator(job, conf): if groupname == 'patient': - if 'patient_id' not in group_params.keys(): + if 'patient_id' not in list(group_params.keys()): raise ParameterError('A patient group is missing the patient_id flag.') sample_set[group_params['patient_id']] = group_params elif groupname == 'Universal_Options': @@ -99,7 +99,7 @@ def parse_config_file(job, config_file): raise ParameterError(' The following tools have no arguments in the config file : \n' + '\n'.join(missing_tools)) # Start a job for each sample in the sample set - for patient_id in sample_set.keys(): + for patient_id in list(sample_set.keys()): job.addFollowOnJobFn(pipeline_launchpad, sample_set[patient_id], univ_options, tool_options) return None @@ -296,7 +296,7 @@ def delete_fastqs(job, fastqs): +- 'normal_dna': [ , ] """ for fq_type in ['tumor_rna', 'tumor_dna', 'normal_dna']: - for i in xrange(0,2): + for i in range(0,2): job.fileStore.deleteGlobalFile(fastqs[fq_type][i]) return None @@ -685,7 +685,7 @@ def spawn_radia(job, rna_bam, tumor_bam, normal_bam, univ_options, radia_options 'normal_dnai': normal_bam['normal_dna_fix_pg_sorted.bam.bai']} # Make a dict object to hold the return values for each of the chromosome jobs. Then run radia # on each chromosome. - chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']] + chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']] perchrom_radia = defaultdict() for chrom in chromosomes: perchrom_radia[chrom] = job.addChildJobFn(run_radia, bams, univ_options, radia_options, @@ -710,11 +710,11 @@ def merge_radia(job, perchrom_rvs): work_dir = job.fileStore.getLocalTempDir() # We need to squash the input dict of dicts to a single dict such that it can be passed to # get_files_from_filestore - input_files = {filename: jsid for perchrom_files in perchrom_rvs.values() - for filename, jsid in perchrom_files.items()} + input_files = {filename: jsid for perchrom_files in list(perchrom_rvs.values()) + for filename, jsid in list(perchrom_files.items())} input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) - chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']] + chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']] with open('/'.join([work_dir, 'radia_calls.vcf']), 'w') as radfile, \ open('/'.join([work_dir, 'radia_filter_passing_calls.vcf']), 'w') as radpassfile: for chrom in chromosomes: @@ -909,7 +909,7 @@ def spawn_mutect(job, tumor_bam, normal_bam, univ_options, mutect_options): job.fileStore.logToMaster('Running spawn_mutect on %s' % univ_options['patient']) # Make a dict object to hold the return values for each of the chromosome # jobs. Then run mutect on each chromosome. - chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']] + chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']] perchrom_mutect = defaultdict() for chrom in chromosomes: perchrom_mutect[chrom] = job.addChildJobFn(run_mutect, tumor_bam, normal_bam, univ_options, @@ -932,10 +932,10 @@ def merge_mutect(job, perchrom_rvs): work_dir = job.fileStore.getLocalTempDir() # We need to squash the input dict of dicts to a single dict such that it can be passed to # get_files_from_filestore - input_files = {filename: jsid for perchrom_files in perchrom_rvs.values() - for filename, jsid in perchrom_files.items()} + input_files = {filename: jsid for perchrom_files in list(perchrom_rvs.values()) + for filename, jsid in list(perchrom_files.items())} input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) - chromosomes = [''.join(['chr', str(x)]) for x in range(1, 23) + ['X', 'Y']] + chromosomes = [''.join(['chr', str(x)]) for x in list(range(1, 23)) + ['X', 'Y']] with open('/'.join([work_dir, 'mutect_calls.vcf']), 'w') as mutvcf, \ open('/'.join([work_dir, 'mutect_calls.out']), 'w') as mutout, \ open('/'.join([work_dir, 'mutect_passing_calls.vcf']), 'w') as mutpassvcf: @@ -1076,7 +1076,7 @@ def run_mutation_aggregator(job, fusion_output, radia_output, mutect_output, ind input_files.pop('fusion.vcf') # read files into memory vcf_file = defaultdict() - mutcallers = input_files.keys() + mutcallers = list(input_files.keys()) with open(''.join([work_dir, '/', univ_options['patient'], '_merged_mutations.vcf']), 'w') as merged_mut_file: for mut_caller in mutcallers: @@ -1502,8 +1502,8 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files): mhci_files = get_files_from_filestore(job, mhci_preds, work_dir) # First split mhcii_preds into prediction files and predictors and maintain keys so we can later # reference them in pairs - mhcii_predictors = {x: y[1] for x, y in mhcii_preds.items()} - mhcii_files = {x: y[0] for x, y in mhcii_preds.items()} + mhcii_predictors = {x: y[1] for x, y in list(mhcii_preds.items())} + mhcii_files = {x: y[0] for x, y in list(mhcii_preds.items())} mhcii_files = get_files_from_filestore(job, mhcii_files, work_dir) # Get peptide files pept_files = get_files_from_filestore(job, pept_files, work_dir) @@ -1515,7 +1515,7 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files): pepmap = json.load(mapfile) # Incorporate peptide names into the merged calls with open('/'.join([work_dir, 'mhci_merged_files.list']), 'w') as mhci_resfile: - for mhcifile in mhci_files.values(): + for mhcifile in list(mhci_files.values()): with open(mhcifile, 'r') as mf: for line in mf: # Skip header lines @@ -1536,7 +1536,7 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files): # Incorporate peptide names into the merged calls with open('/'.join([work_dir, 'mhcii_merged_files.list']), 'w') as \ mhcii_resfile: - for mhciifile in mhcii_files.keys(): + for mhciifile in list(mhcii_files.keys()): core_col = None # Variable to hold the column number with the core if mhcii_predictors[mhciifile] == 'Consensus': with open(mhcii_files[mhciifile], 'r') as mf: @@ -1740,7 +1740,7 @@ def prepare_samples(job, fastqs, univ_options): 'normal_dna_fastq_prefix'} if set(fastqs.keys()).difference(allowed_samples) != {'patient_id'}: raise ParameterError('Sample with the following parameters has an error:\n' + - '\n'.join(fastqs.values())) + '\n'.join(list(fastqs.values()))) # For each sample type, check if the prefix is an S3 link or a regular file # Download S3 files. for sample_type in ['tumor_dna', 'tumor_rna', 'normal_dna']: @@ -1800,7 +1800,7 @@ def get_files_from_filestore(job, files, work_dir, cache=True, docker=False): work_dir is the location where the file should be stored cache indiciates whether caching should be used """ - for name in files.keys(): + for name in list(files.keys()): outfile = job.fileStore.readGlobalFile(files[name], '/'.join([work_dir, name]), cache=cache) # If the file pointed to a tarball, extract it to WORK_DIR if tarfile.is_tarfile(outfile) and file_xext(outfile).startswith('.tar'): @@ -1847,15 +1847,15 @@ def most_probable_alleles(allele_list): except KeyError: all_alleles[allele] = [float(pvalue)] # If there are less than 2 alleles, report all - if len(all_alleles.keys()) <= 2: - return all_alleles.keys() + if len(list(all_alleles.keys())) <= 2: + return list(all_alleles.keys()) # Else, get the two with most evidence. Evidence is gauged by # a) How many files (of the 3) thought that Allele was present # b) In a tie, who has a lower avg p value # In the lambda function, if 2 alleles have the same number of calls, the sum of the p values is # a measure of the avg because avg = sum / n and n is equal in both of them. else: - return sorted(all_alleles.keys(), key=lambda x: \ + return sorted(list(all_alleles.keys()), key=lambda x: \ (-len(all_alleles[x]), sum(all_alleles[x])))[0:2] @@ -2031,7 +2031,7 @@ def print_mhc_peptide(neoepitope_info, peptides, pepmap, outfile): 'ensembl_gene\thugo_gene\tcomma_sep_transcript_mutations' """ allele, pept, pred, core = neoepitope_info - peptide_names = [x for x, y in peptides.items() if pept in y] + peptide_names = [x for x, y in list(peptides.items()) if pept in y] # For each peptide, append the ensembl gene for peptide_name in peptide_names: print(allele, pept, peptide_name, core, '0', pred, pepmap[peptide_name], sep='\t', @@ -2368,7 +2368,7 @@ def strip_xext(filepath): :return str filepath: Path to the file with the compression extension stripped off. """ ext_size = len(file_xext(filepath).split('.')) - 1 - for i in xrange(0, ext_size): + for i in range(0, ext_size): filepath = os.path.splitext(filepath)[0] return filepath diff --git a/changes.md b/changes.md new file mode 100644 index 00000000..1d7fa69c --- /dev/null +++ b/changes.md @@ -0,0 +1,22 @@ +# Manual Changes to ProTECT +:star: indicates changes to the algorithm that *could* potentially change results, though best efforts were made for 1:1 conversion + +:black_square_button: indicates changes i hope to reverse and are only 'temp fixes' + +- Originally ran 2to3 in commit a5d062fab68f8bbbebc2bbe9f4192b47b451146e + - removed explicit versioning in the [Makefile](https://github.com/BD2KGenomics/protect/commit/a5d062fab68f8bbbebc2bbe9f4192b47b451146e#diff-76ed074a9305c04054cdebb9e9aad2d818052b07091de1f20cad0bbac34ffb52) since py3 version still in dev +- :black_square_button: [removed version checks in setup.py](https://github.com/BD2KGenomics/protect/commit/f04f22fb9f50270e5c0307d4a64aca0f3f7022d3) and obsolete setuptools + - [along with setup version](https://github.com/BD2KGenomics/protect/commit/f70d3196198a2530406906b8af5a55b848aa0b14) +- [changed default references](https://github.com/BD2KGenomics/protect/commit/c2fe3a8b8223682e6d63cccb4fccf0787227c525) from s3://cgl-pipeline-inputs to s3://protect-data + - this s3 bucket is pay to access, however currently s3am is untested and only automatically converted +- :star::star: [common.py chromosome sorting](https://github.com/BD2KGenomics/protect/commit/b5ca956f3dfe05bf6714be8135cc90fe48140d98) +- [docker image decodes to utf-8](https://github.com/BD2KGenomics/protect/commit/1d2bdb941548bdf4703113140d1f0758791bf88a) +- [IOBase check rather than file](https://github.com/BD2KGenomics/protect/commit/351e855184ae218242a42e1aaa5781d22aba0511) +- [some binary vs string adaptations](https://github.com/BD2KGenomics/protect/commit/5a4c50d1d2b8c71f3bc2f512f3679e80368044be#diff-e46b0e6e9cc33d9130334ab6994c9684b0972aaca58c889b6c1f4819751f1c79) +- :star: [change obselete ix to loc](https://github.com/BD2KGenomics/protect/commit/66bc12db0b815ab2099ee0174a06b923240322a4#diff-3347ae223ced4e929cf7f273bf839bdeb219d82681f8e66a951d85cbeb079685) +- :black_square_button: quick fix: running into problem with '80.0' default for cores. can't figure out where the default is coming from so manual changes: + - [phlat cores to 10](https://github.com/BD2KGenomics/protect/commit/66bc12db0b815ab2099ee0174a06b923240322a4#diff-7e85a3e4e9c911fded129ff48b2dd983d800c5190412f641eee85ff23ed9295c) + - [rsem cores to 20](https://github.com/BD2KGenomics/protect/commit/66bc12db0b815ab2099ee0174a06b923240322a4#diff-1615337ffdbffe39413f26e4ccbb5309ed10b61987559df23ce6fc57cb5dd86a) + - [star fusion cores to 20](https://github.com/BD2KGenomics/protect/commit/66bc12db0b815ab2099ee0174a06b923240322a4#diff-60e2cfd2feabfe71442d69d1d0d44ff293f8fe1e12aa74c3fe52101d5b32e60e) +- [string.maketrans is obselete, str.maketrans is better](https://github.com/BD2KGenomics/protect/commit/66bc12db0b815ab2099ee0174a06b923240322a4#diff-60e2cfd2feabfe71442d69d1d0d44ff293f8fe1e12aa74c3fe52101d5b32e60eR300) +- changed gunzip file write to use a library (faster?) diff --git a/docker/pipelineWrapper.py b/docker/pipelineWrapper.py index d767e41b..12502809 100644 --- a/docker/pipelineWrapper.py +++ b/docker/pipelineWrapper.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function + import tarfile import argparse @@ -103,7 +103,7 @@ def getName(fileName): # move individual files out for fileName in consistentNaming: shutil.copyfile(getName(fileName), os.path.join(output_dir, os.path.basename(fileName))) - for src, dst in renamingNeeded.iteritems(): + for src, dst in renamingNeeded.items(): if dst.endswith('.tar'): make_tar(getName(src), os.path.join(output_dir, dst)) else: diff --git a/docker/wrapper.py b/docker/wrapper.py index c142f387..0d08c223 100644 --- a/docker/wrapper.py +++ b/docker/wrapper.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function + from pipelineWrapper import PipelineWrapperBuilder import logging import os @@ -437,20 +437,20 @@ def str2bool(v): help='Tabix index for dbsnp.gz.') parser.add_argument('--mhc-pathways-file', type=str, - default="S3://cgl-pipeline-inputs/protect/ci_references/" + default="S3://protect-data/hg38_references/" "mhc_pathways.tsv.tar.gz", help='JSON file containing the various genes in the MHC pathway' 'and their mean TPM expressions across samples in a background set.') parser.add_argument('--itx-resistance-file', type=str, - default="S3://cgl-pipeline-inputs/protect/ci_references/" + default="S3://protect-data/hg38_references/" "itx_resistance.tsv.tar.gz", help='') parser.add_argument('--immune-resistance-pathways-file', type=str, - default="S3://cgl-pipeline-inputs/protect/ci_references/" + default="S3://protect-data/hg38_references/" "immune_resistance_pathways.json.tar.gz", help='') parser.add_argument('--car-t-targets-file', type=str, - default="S3://cgl-pipeline-inputs/protect/ci_references/" + default="S3://protect-data/hg38_references/" "car_t_targets.tsv.tar.gzz", help='') diff --git a/setup.py b/setup.py old mode 100644 new mode 100755 index 96cb4ebe..03c4a4b0 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -14,25 +14,25 @@ # See the License for the specific language governing permissions and # limitations under the License. from pkg_resources import parse_version -try: - from pkg_resources import SetuptoolsLegacyVersion as _LegacyVersion -except ImportError as e: - if 'SetuptoolsLegacyVersion' in e.message: - from packaging.version import LegacyVersion as _LegacyVersion - else: - raise +#try: +# from pkg_resources import SetuptoolsLegacyVersion as _LegacyVersion +#except ImportError as e: +# if 'SetuptoolsLegacyVersion' in e.message: +# from packaging.version import LegacyVersion as _LegacyVersion +# else: +# raise from setuptools import find_packages, setup from setuptools.command.test import test as TestCommand -from version import version +#from version import version import errno import subprocess import sys - -toil_version = '3.8.0' -s3am_version = '2.0.1' -gdc_version = 'v1.1.0' +#outdated for python3 +#toil_version = '3.8.0' +#s3am_version = '2.0.1' +#gdc_version = 'v1.1.0' def check_tool_version(tool, required_version, blacklisted_versions=None, binary=False): @@ -66,9 +66,9 @@ def check_tool_version(tool, required_version, blacklisted_versions=None, binary raise RuntimeError('Does %s have a version.py?' % tool) if type(parse_version(installed_version)) == _LegacyVersion: - print('Detecting that the installed version of "%s"(%s) is probably based off a git commit ' + print(('Detecting that the installed version of "%s"(%s) is probably based off a git commit ' 'and assuming this build is for testing purposes. If this is not the case, please ' - 'try again with a valid version of "%s".' % (tool, installed_version, tool)) + 'try again with a valid version of "%s".' % (tool, installed_version, tool))) elif parse_version(installed_version) < parse_version(required_version): raise RuntimeError('%s was detected to be version (%s) but ProTECT requires (%s)' % (tool, installed_version, required_version)) @@ -79,11 +79,11 @@ def check_tool_version(tool, required_version, blacklisted_versions=None, binary # Check Toil version -check_tool_version('toil', toil_version, binary=True) +#check_tool_version('toil', toil_version, binary=True) # Check S3am version -check_tool_version('s3am', s3am_version, binary=True) +#check_tool_version('s3am', s3am_version, binary=True) # Check gdc-client version -check_tool_version('gdc-client', gdc_version, binary=True, blacklisted_versions=['v1.2.0']) +#check_tool_version('gdc-client', gdc_version, binary=True, blacklisted_versions=['v1.2.0']) # Set up a test class @@ -109,7 +109,6 @@ def run_tests(self): setup(name='protect', - version=version, description='Prediction of T-Cell Epitopes for Cancer Therapy', url='http://github.com/BD2KGenomics/protect', author='Arjun Arkal Rao', @@ -117,10 +116,10 @@ def run_tests(self): license='Apache', install_requires=[ 'PyYAML', - 'pandas==0.19.2' + 'pandas' ], tests_require=[ - 'pytest==2.8.3'], + 'pytest'], test_suite='protect', entry_points={ 'console_scripts': [ diff --git a/src/__init__.py b/src/__init__.py index ba9b9130..dabfb0af 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,4 +13,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import + diff --git a/src/protect/ProTECT_config.yaml b/src/protect/ProTECT_config.yaml new file mode 100644 index 00000000..239a42d0 --- /dev/null +++ b/src/protect/ProTECT_config.yaml @@ -0,0 +1,179 @@ +## Copyright 2016 UCSC Computational Genomics Lab +## Original contributor: Arjun Arkal Rao +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +#################################################################################################### +#################################################################################################### +## This is the input parameter file for the precision immuno pipeline. The parameters for each of +## the tools is provided here. The file is written in the YAML format. A nice description of the +## format can be found at http://docs.ansible.com/ansible/YAMLSyntax.html +## +## You can add comments anywhere in this file by prefixing it with a '#' +## +## Unless otherwise mentioned, all fields must be filled. +## +#################################################################################################### +#################################################################################################### + +# Any number of patients/samples can be listed here +patients: + # Each group starts with the patient ID + PRTCT-01: + # The paths should point to the forward read of the pair. The pipeline assumes that the + # forward and reverse read files have the same prefix + tumor_dna_fastq_1: /home/dranion/treehouse/gms/hcc1395_1tenth_percent/sort/gerald_D1VCPACXX_1_R1.fastq + tumor_dna_fastq_2: /home/dranion/treehouse/gms/hcc1395_1tenth_percent/sort/gerald_D1VCPACXX_1_R2.fastq + normal_dna_fastq_1: /home/dranion/treehouse/gms/hcc1395_1tenth_percent/sort/gerald_D1VCPACXX_6_R1.fastq + normal_dna_fastq_2: /home/dranion/treehouse/gms/hcc1395_1tenth_percent/sort/gerald_D1VCPACXX_6_R2.fastq + tumor_rna_fastq_1: /home/dranion/treehouse/gms/hcc1395_1tenth_percent/sort/gerald_C1TD1ACXX_8_ACAGTG_R1.fastq + tumor_rna_fastq_2: /home/dranion/treehouse/gms/hcc1395_1tenth_percent/sort/gerald_C1TD1ACXX_8_ACAGTG_R2.fastq + tumor_type: SKCM + # ssec_encrypted: False + # filter_for_OxoG: False + # PRTCT-02: + # The paths can also be to directories on S3 as + #tumor_dna_fastq_1: S3://bucket/path/to/1.fastq.gz + #normal_dna_fastq_1: S3://bucket/path/to/1.fastq.gz + #tumor_rna_fastq_1: https://S3-.awsamazon.com/bucket/path/to/1.fastq.gz + +# These are options that are used by most tools +Universal_Options: + dockerhub: aarjunrao + java_Xmx: 20G + reference_build: hg38 # Acceptable options are hg38, hg38, GRCh37, GRCh38 + # sse_key: /path/to/master.key # Path to the AWS master key. Required if using AWS else optional + # sse_key_is_master: True # True or False. Required if using AWS else optional + # gdc_download_token: /path/to/token.txt # Path to the user's GDC download token. + storage_location: Local # Local or aws: for where the output must go + #storage_location: aws:protect-run-xyz + output_folder: /home/dranion/Flashdrive/project-results # Path to where the output must go. + #mail_to: test.email@host.com # Email for sending success report. + + +# These options are for each module. You probably don't need to change any of this! +alignment: + cutadapt: + a: AGATCGGAAGAG + A: AGATCGGAAGAG + # version: 1.9.1 + star: + type: star # use starlong if your reads are > 150bp + index: /home/dranion/Flashdrive/protect-data/star_with_fusion_100bp_readlen_indexes.tar.gz # Use star_without if you set star_fusion = False + # version: 2.5.2b + bwa: + index: /home/dranion/Flashdrive/protect-data/bwa_index.tar.gz + # version: 0.7.9a + post: + samtools: + # version: 1.2 + picard: + # version: 1.135 + +expression_estimation: + rsem: + index: /home/dranion/Flashdrive/protect-data/rsem_index.tar.gz + # version: 1.2.0 + +mutation_calling: + indexes: + chromosomes: canonical_chr, chrM + genome_fasta: /home/dranion/Flashdrive/protect-data/hg38.fa.tar.gz + genome_fai: /home/dranion/Flashdrive/protect-data/hg38.fa.fai.tar.gz + genome_dict: /home/dranion/Flashdrive/protect-data/hg38.dict.tar.gz + cosmic_vcf: /home/dranion/Flashdrive/protect-data/CosmicCodingMuts.vcf.tar.gz + cosmic_idx: /home/dranion/Flashdrive/protect-data/CosmicCodingMuts.vcf.idx.tar.gz + dbsnp_vcf: /home/dranion/Flashdrive/protect-data/dbsnp_coding.vcf.gz + dbsnp_idx: /home/dranion/Flashdrive/protect-data/dbsnp_coding.vcf.idx.tar.gz + dbsnp_tbi: /home/dranion/Flashdrive/protect-data/dbsnp_coding.vcf.gz.tbi + mutect: + java_Xmx: 2G + # version: 1.1.7 + muse: + # version: 1.0rc_submission_b391201 + radia: + cosmic_beds: /home/dranion/Flashdrive/protect-data/radia_cosmic.tar.gz + dbsnp_beds: /home/dranion/Flashdrive/protect-data/radia_dbsnp.tar.gz + retrogene_beds: /home/dranion/Flashdrive/protect-data/radia_retrogenes.tar.gz + pseudogene_beds: /home/dranion/Flashdrive/protect-data/radia_pseudogenes.tar.gz + gencode_beds: /home/dranion/Flashdrive/protect-data/radia_gencode.tar.gz + # version: 398366ef07b5911d8082ed61cbf03d487a41f286 + somaticsniper: + # version: 1.0.4 + samtools: + # version: 0.1.8 + bam_readcount: + # version: 0.7.4 + star_fusion: + #run: True + #version: 1.0.0 + fusion_inspector: + #run_trinity: True + #version: 1.0.1 + strelka: + # version: 1.0.15 + config_file: /home/dranion/Flashdrive/protect-data/strelka_bwa_WXS_config.ini.tar.gz + + +mutation_annotation: + snpeff: + index: /home/dranion/Flashdrive/protect-data/snpeff_index.tar.gz + # version: 3.6 + java_Xmx: 20G + +mutation_translation: + transgene: + gencode_peptide_fasta : /home/dranion/Flashdrive/protect-data/gencode.v25.pc_translations_NOPARY.fa.tar.gz + gencode_transcript_fasta : /home/dranion/Flashdrive/protect-data/gencode.v25.pc_transcripts_NOPARY.fa.tar.gz + gencode_annotation_gtf : /home/dranion/Flashdrive/protect-data/gencode.v25.annotation_NOPARY.gtf.tar.gz + genome_fasta : /home/dranion/Flashdrive/protect-data/hg38.fa.tar.gz + # version: 2.2.2 + +haplotyping: + phlat: + index: /home/dranion/Flashdrive/protect-data/phlat_index.tar.gz + # version: 1.0 + +mhc_peptide_binding: + mhci: + method_file: /home/dranion/Flashdrive/protect-data/mhci_restrictions.json.tar.gz + pred: IEDB_recommended + # version: 2.13 + mhcii: + method_file: /home/dranion/Flashdrive/protect-data/mhcii_restrictions.json.tar.gz + pred: IEDB_recommended + # version: 2.13 + netmhciipan: + # version: 3.1 + +prediction_ranking: + rankboost: + mhci_args: + npa: 0.0 + nph: 0.0 + nMHC: 0.32 + TPM: 0.0 + overlap: 0.68 + tndelta: 0.0 + mhcii_args: + npa: 0.2 + nph: 0.2 + nMHC: 0.2 + TPM: 0.2 + tndelta: 0.2 + # version: 2.0.3 + +reports: + mhc_pathways_file: /home/dranion/Flashdrive/protect-data/mhc_pathways.tsv.tar.gz + itx_resistance_file: /home/dranion/Flashdrive/protect-data/itx_resistance.tsv.tar.gz + immune_resistance_pathways_file: /home/dranion/Flashdrive/protect-data/immune_resistance_pathways.json.tar.gz + car_t_targets_file: /home/dranion/Flashdrive/protect-data/car_t_targets.tsv.tar.gz diff --git a/src/protect/__init__.py b/src/protect/__init__.py index ba9b9130..dabfb0af 100644 --- a/src/protect/__init__.py +++ b/src/protect/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,4 +13,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import + diff --git a/src/protect/addons/__init__.py b/src/protect/addons/__init__.py index ba9b9130..dabfb0af 100644 --- a/src/protect/addons/__init__.py +++ b/src/protect/addons/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,4 +13,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import + diff --git a/src/protect/addons/assess_car_t_validity.py b/src/protect/addons/assess_car_t_validity.py index f28a2de4..e49a4469 100644 --- a/src/protect/addons/assess_car_t_validity.py +++ b/src/protect/addons/assess_car_t_validity.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function + from protect.addons.common import TCGAToGTEx from protect.common import export_results, get_files_from_filestore, untargz from protect.haplotyping.phlat import parse_phlat_file diff --git a/src/protect/addons/assess_immunotherapy_resistance.py b/src/protect/addons/assess_immunotherapy_resistance.py index 568aacf9..6f96530b 100644 --- a/src/protect/addons/assess_immunotherapy_resistance.py +++ b/src/protect/addons/assess_immunotherapy_resistance.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function + from protect.addons.common import TCGAToGTEx from protect.common import export_results, get_files_from_filestore, untargz diff --git a/src/protect/addons/assess_mhc_pathway.py b/src/protect/addons/assess_mhc_pathway.py index 149df4ba..5e152082 100644 --- a/src/protect/addons/assess_mhc_pathway.py +++ b/src/protect/addons/assess_mhc_pathway.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function + from collections import Counter from protect.addons.common import TCGAToGTEx from protect.common import export_results, get_files_from_filestore, untargz @@ -81,7 +81,7 @@ def assess_mhc_genes(job, gene_expression, rna_haplotype, univ_options, reports_ # Read the patient gene values into a dictionary gene_expressions = pd.read_table(input_files['rsem_quant.tsv'], index_col=0, header=0) - gene_expressions = Counter({x.split('.')[0]: y for x, y in gene_expressions['TPM'].to_dict().items()}) + gene_expressions = Counter({x.split('.')[0]: y for x, y in list(gene_expressions['TPM'].to_dict().items())}) # Print the report roles = {x for x in background_df['Roles'].values if ',' not in x} with open('mhc_pathway_report.txt', 'w') as mpr: @@ -119,7 +119,9 @@ def assess_mhc_genes(job, gene_expression, rna_haplotype, univ_options, reports_ result, 2, result), file=mpr) for ensg in role_df.index: - ensgName = background_df.ix[ensg, 'Name'] + #ix was depricated in 0.20.0 + #ensgName = background_df.ix[ensg, 'Name'] + ensgName = background_df.loc[ensg, 'Name'] b_vals = {} for bkg in b_types: val = "{0:.2f}".format(role_df.loc[ensg].get(b_types[bkg], default='NA')) diff --git a/src/protect/addons/common.py b/src/protect/addons/common.py index 8566ae83..5dc8be99 100644 --- a/src/protect/addons/common.py +++ b/src/protect/addons/common.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function + from collections import Counter diff --git a/src/protect/alignment/__init__.py b/src/protect/alignment/__init__.py index ba9b9130..dabfb0af 100644 --- a/src/protect/alignment/__init__.py +++ b/src/protect/alignment/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,4 +13,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import + diff --git a/src/protect/alignment/common.py b/src/protect/alignment/common.py index b959fc37..70cb72a0 100644 --- a/src/protect/alignment/common.py +++ b/src/protect/alignment/common.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import + from math import ceil from protect.common import docker_call, docker_path, export_results, get_files_from_filestore diff --git a/src/protect/alignment/dna.py b/src/protect/alignment/dna.py index 105fa3d8..767065a1 100644 --- a/src/protect/alignment/dna.py +++ b/src/protect/alignment/dna.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import, print_function + from math import ceil from protect.alignment.common import index_bamfile, index_disk @@ -128,7 +128,7 @@ def run_bwa(job, fastqs, sample_type, univ_options, bwa_options): input_files[read_file + gz] = input_files[read_file] + gz # Untar the index input_files['bwa_index'] = untargz(input_files['bwa_index.tar.gz'], work_dir) - input_files = {key: docker_path(path) for key, path in input_files.items()} + input_files = {key: docker_path(path) for key, path in list(input_files.items())} parameters = ['mem', '-t', str(bwa_options['n']), diff --git a/src/protect/alignment/rna.py b/src/protect/alignment/rna.py index fe8fe86b..aeb38320 100644 --- a/src/protect/alignment/rna.py +++ b/src/protect/alignment/rna.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import, print_function + from collections import defaultdict from math import ceil @@ -94,7 +94,7 @@ def run_star(job, fastqs, univ_options, star_options): if os.path.exists(star_fusion_idx): input_files['star_index'] = star_fusion_idx - input_files = {key: docker_path(path, work_dir=work_dir) for key, path in input_files.items()} + input_files = {key: docker_path(path, work_dir=work_dir) for key, path in list(input_files.items())} # Using recommended STAR-Fusion parameters: # https://github.com/STAR-Fusion/STAR-Fusion/wiki diff --git a/src/protect/binding_prediction/__init__.py b/src/protect/binding_prediction/__init__.py index ba9b9130..dabfb0af 100644 --- a/src/protect/binding_prediction/__init__.py +++ b/src/protect/binding_prediction/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,4 +13,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import + diff --git a/src/protect/binding_prediction/common.py b/src/protect/binding_prediction/common.py index 3bead87e..2c58a9e9 100644 --- a/src/protect/binding_prediction/common.py +++ b/src/protect/binding_prediction/common.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import, print_function + from collections import defaultdict from protect.binding_prediction.mhci import predict_mhci_binding @@ -108,7 +108,7 @@ def spawn_antigen_predictors(job, transgened_files, phlat_files, univ_options, m mhci_preds[(allele, peplen)] = mhci_job.addChildJobFn( predict_normal_binding, mhci_job.rv(), - {x: y for x, y in pept_files.items() if peplen in x}, + {x: y for x, y in list(pept_files.items()) if peplen in x}, allele, peplen, univ_options, @@ -125,7 +125,7 @@ def spawn_antigen_predictors(job, transgened_files, phlat_files, univ_options, m mhcii_preds[(allele, 15)] = mhcii_job.addFollowOnJobFn( predict_normal_binding, mhcii_job.rv(), - {x: y for x, y in pept_files.items() if '15' in x}, + {x: y for x, y in list(pept_files.items()) if '15' in x}, allele, '15', univ_options, @@ -145,8 +145,8 @@ def read_fastas(input_files): :return: The read fastas in a dictionary of tuples :rtype: dict """ - tumor_file = [y for x, y in input_files.items() if x.startswith('T')][0] - normal_file = [y for x, y in input_files.items() if x.startswith('N')][0] + tumor_file = [y for x, y in list(input_files.items()) if x.startswith('T')][0] + normal_file = [y for x, y in list(input_files.items()) if x.startswith('N')][0] output_files = defaultdict(list) output_files = _read_fasta(tumor_file, output_files) num_entries = len(output_files) @@ -343,7 +343,7 @@ def _get_normal_peptides(job, mhc_df, iars, peplen): peplen = int(peplen) normal_peptides = [] for pred in mhc_df.itertuples(): - containing_iars = [i for i, sl in iars.items() if pred.pept in sl[0]] + containing_iars = [i for i, sl in list(iars.items()) if pred.pept in sl[0]] assert len(containing_iars) != 0, "No IARS contained the peptide" if len(iars[containing_iars[0]]) == 1: # This is a fusion and has no corresponding normal @@ -351,7 +351,7 @@ def _get_normal_peptides(job, mhc_df, iars, peplen): else: # If there are multiple IARs, they all or none of them have to have a corresponding # normal. - if len(set([len(y) for x, y in iars.items() if x in containing_iars])) != 1: + if len(set([len(y) for x, y in list(iars.items()) if x in containing_iars])) != 1: job.fileStore.logToMaster('Some IARS were found to contain the substring but were' 'inconsistent with the presence of a corresponding ' 'normal.') @@ -594,7 +594,7 @@ def print_mhc_peptide(neoepitope_info, peptides, pepmap, outfile, netmhc=False): if netmhc: peptide_names = [neoepitope_info.peptide_name] else: - peptide_names = [x for x, y in peptides.items() if neoepitope_info.pept in y] + peptide_names = [x for x, y in list(peptides.items()) if neoepitope_info.pept in y] # Convert named tuple to dict so it can be modified neoepitope_info = neoepitope_info._asdict() # Handle fusion peptides (They are characterized by having all N's as the normal partner) diff --git a/src/protect/binding_prediction/mhci.py b/src/protect/binding_prediction/mhci.py index 2708771d..f5679fbd 100644 --- a/src/protect/binding_prediction/mhci.py +++ b/src/protect/binding_prediction/mhci.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import, print_function + from protect.common import docker_call, get_files_from_filestore, read_peptide_file diff --git a/src/protect/binding_prediction/mhcii.py b/src/protect/binding_prediction/mhcii.py index 3e6f93a0..91db1f62 100644 --- a/src/protect/binding_prediction/mhcii.py +++ b/src/protect/binding_prediction/mhcii.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import, print_function + from protect.common import docker_call, get_files_from_filestore, read_peptide_file diff --git a/src/protect/common.py b/src/protect/common.py old mode 100644 new mode 100755 index 17d72b5e..467ca016 --- a/src/protect/common.py +++ b/src/protect/common.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -21,16 +21,17 @@ Program info can be found in the docstring of the main function. Details can also be obtained by running the script with -h . """ -from __future__ import print_function + from collections import defaultdict from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText -from urlparse import urlparse - +from urllib.parse import urlparse +from io import IOBase import errno import gzip import logging +import shutil import os import re import smtplib @@ -38,7 +39,7 @@ import subprocess import sys import tarfile -import urllib2 +import urllib.request, urllib.error, urllib.parse import uuid @@ -53,7 +54,7 @@ def get_files_from_filestore(job, files, work_dir, docker=False): :return: Dict of files: (optionallly docker-friendly) fileepaths :rtype: dict """ - for name in files.keys(): + for name in list(files.keys()): outfile = job.fileStore.readGlobalFile(files[name], '/'.join([work_dir, name])) # If the files will be sent to docker, we will mount work_dir to the container as /data and # we want the /data prefixed path to the file @@ -98,7 +99,7 @@ def docker_call(tool, tool_parameters, work_dir, java_xmx=None, outfile=None, # If an outifle has been provided, then ensure that it is of type file, it is writeable, and # that it is open. if outfile: - assert isinstance(outfile, file), 'outfile was not passsed a file' + assert isinstance(outfile, IOBase), 'outfile was not passsed a file' assert outfile.mode in ['w', 'a', 'wb', 'ab'], 'outfile not writeable' assert not outfile.closed, 'outfile is closed' # If the call is interactive, set intereactive to -i @@ -110,7 +111,7 @@ def docker_call(tool, tool_parameters, work_dir, java_xmx=None, outfile=None, docker_tool = ''.join([dockerhub, '/', tool, ':', tool_version]) # Get the docker image on the worker if needed call = ['docker', 'images'] - dimg_rv = subprocess.check_output(call) + dimg_rv = subprocess.check_output(call).decode('utf-8') existing_images = [':'.join(x.split()[0:2]) for x in dimg_rv.splitlines() if x.startswith(dockerhub)] @@ -160,7 +161,7 @@ def untargz(input_targz_file, untar_to_dir): return return_value -def gunzip(input_gzip_file, block_size=1024): +def gunzip(input_gzip_file, block_size=2048): """ Gunzips the input file to the same directory @@ -171,13 +172,8 @@ def gunzip(input_gzip_file, block_size=1024): assert os.path.splitext(input_gzip_file)[1] == '.gz' assert is_gzipfile(input_gzip_file) with gzip.open(input_gzip_file) as infile: - with open(os.path.splitext(input_gzip_file)[0], 'w') as outfile: - while True: - block = infile.read(block_size) - if block == '': - break - else: - outfile.write(block) + with open(os.path.splitext(input_gzip_file)[0], 'wb') as outfile: + shutil.copyfileobj(infile, outfile) return outfile.name @@ -197,7 +193,7 @@ def is_gzipfile(filename): 'point to a file.' with open(filename, 'rb') as in_f: start_of_file = in_f.read(3) - if start_of_file == '\x1f\x8b\x08': + if start_of_file == b'\x1f\x8b\x08': return True else: return False @@ -355,8 +351,8 @@ def get_file_from_url(job, any_url, encryption_key=None, per_file_encryption=Tru url = any_url parsed_url = urlparse(any_url) try: - response = urllib2.urlopen(url) - except urllib2.HTTPError: + response = urllib.request.urlopen(url) + except urllib.error.HTTPError: if parsed_url.netloc.startswith(('s3', 'S3')): job.fileStore.logToMaster("Detected https link is for an encrypted s3 file.") return get_file_from_s3(job, any_url, encryption_key=encryption_key, @@ -431,7 +427,7 @@ def export_results(job, fsid, file_name, univ_options, subfolder=None): # Handle Local try: # Create the directory if required - os.makedirs(output_folder, 0755) + os.makedirs(output_folder, 0o755) except OSError as err: if err.errno != errno.EEXIST: raise @@ -455,7 +451,7 @@ def delete_fastqs(job, patient_dict): :param dict patient_dict: Dict of list of input fastqs """ - for key in patient_dict.keys(): + for key in list(patient_dict.keys()): if 'fastq' not in key: continue job.fileStore.logToMaster('Deleting "%s:%s" ' % (patient_dict['patient_id'], key) + @@ -472,10 +468,10 @@ def delete_bams(job, bams, patient_id): :param dict bams: Dict of bam and bai files :param str patient_id: The ID of the patient for logging purposes. """ - bams = {b: v for b, v in bams.items() + bams = {b: v for b, v in list(bams.items()) if (b.endswith('.bam') or b.endswith('.bai')) and v is not None} if bams: - for key, val in bams.items(): + for key, val in list(bams.items()): job.fileStore.logToMaster('Deleting "%s" for patient "%s".' % (key, patient_id)) job.fileStore.deleteGlobalFile(val) elif 'rna_genome' in bams: @@ -600,7 +596,11 @@ def canonical_chrom_sorted(in_chroms): if 'MT' in in_chroms: in_chroms[in_chroms.index('MT')] = 'M' mt = True - in_chroms = sorted(in_chroms, key=lambda c: int(c) if c not in ('X', 'Y', 'M') else c) + num_in_chroms = sorted(filter(str.isnumeric, in_chroms), + key = lambda c: int(c)) + chr_in_chroms = sorted(filter(str.isalpha, in_chroms)) + in_chroms = num_in_chroms.copy() + in_chroms.extend(chr_in_chroms) try: m_index = in_chroms.index('M') except ValueError: @@ -656,4 +656,4 @@ def dummy_job(job, return_value): :param return_value: Any object of any pickle-able type :return: return_value """ - return return_value \ No newline at end of file + return return_value diff --git a/src/protect/expression_profiling/__init__.py b/src/protect/expression_profiling/__init__.py index ba9b9130..dabfb0af 100644 --- a/src/protect/expression_profiling/__init__.py +++ b/src/protect/expression_profiling/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,4 +13,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import + diff --git a/src/protect/expression_profiling/rsem.py b/src/protect/expression_profiling/rsem.py index cc44699b..91c329d1 100644 --- a/src/protect/expression_profiling/rsem.py +++ b/src/protect/expression_profiling/rsem.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function + from math import ceil from protect.common import (docker_call, @@ -75,10 +75,10 @@ def run_rsem(job, rna_bam, univ_options, rsem_options): input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) input_files['rsem_index'] = untargz(input_files['rsem_index.tar.gz'], work_dir) - input_files = {key: docker_path(path) for key, path in input_files.items()} + input_files = {key: docker_path(path) for key, path in list(input_files.items())} parameters = ['--paired-end', - '-p', str(rsem_options['n']), + '-p', str(20), '--bam', input_files['star_transcriptome.bam'], '--no-bam-output', diff --git a/src/protect/haplotyping/__init__.py b/src/protect/haplotyping/__init__.py index ba9b9130..dabfb0af 100644 --- a/src/protect/haplotyping/__init__.py +++ b/src/protect/haplotyping/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,4 +13,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import + diff --git a/src/protect/haplotyping/phlat.py b/src/protect/haplotyping/phlat.py index 1d34c9c1..43e7bdf8 100644 --- a/src/protect/haplotyping/phlat.py +++ b/src/protect/haplotyping/phlat.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import, print_function + from collections import defaultdict from math import ceil @@ -51,6 +51,7 @@ def run_phlat(job, fastqs, sample_type, univ_options, phlat_options): 'input_2.fastq': fastqs[1], 'phlat_index.tar.gz': phlat_options['index']} input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) + print(input_files) # Handle gzipped files gz = '.gz' if is_gzipfile(input_files['input_1.fastq']) else '' if gz: @@ -59,7 +60,7 @@ def run_phlat(job, fastqs, sample_type, univ_options, phlat_options): input_files[read_file + gz] = input_files[read_file] + gz # Untar the index input_files['phlat_index'] = untargz(input_files['phlat_index.tar.gz'], work_dir) - input_files = {key: docker_path(path) for key, path in input_files.items()} + input_files = {key: docker_path(path) for key, path in list(input_files.items())} parameters = ['-1', input_files['input_1.fastq' + gz], '-2', input_files['input_2.fastq' + gz], @@ -68,7 +69,7 @@ def run_phlat(job, fastqs, sample_type, univ_options, phlat_options): '-tag', sample_type, '-e', '/home/phlat-1.0', # Phlat directory home '-o', '/data', # Output directory - '-p', str(phlat_options['n'])] # Number of threads + '-p', str(10)] # Number of threads docker_call(tool='phlat', tool_parameters=parameters, work_dir=work_dir, dockerhub=univ_options['dockerhub'], tool_version=phlat_options['version']) output_file = job.fileStore.writeGlobalFile(''.join([work_dir, '/', sample_type, '_HLA.sum'])) @@ -182,13 +183,13 @@ def most_probable_alleles(allele_list): except KeyError: all_alleles[allele] = [float(pvalue)] # If there are less than 2 alleles, report all - if len(all_alleles.keys()) <= 2: - return all_alleles.keys() + if len(list(all_alleles.keys())) <= 2: + return list(all_alleles.keys()) # Else, get the two with most evidence. Evidence is gauged by # a) How many files (of the 3) thought that Allele was present # b) In a tie, who has a lower avg p value # In the lambda function, if 2 alleles have the same number of calls, the sum of the p values is # a measure of the avg because avg = sum / n and n is equal in both of them. else: - return sorted(all_alleles.keys(), + return sorted(list(all_alleles.keys()), key=lambda x: (-len(all_alleles[x]), sum(all_alleles[x])))[0:2] diff --git a/src/protect/mutation_annotation/__init__.py b/src/protect/mutation_annotation/__init__.py index ba9b9130..dabfb0af 100644 --- a/src/protect/mutation_annotation/__init__.py +++ b/src/protect/mutation_annotation/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,4 +13,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import + diff --git a/src/protect/mutation_annotation/snpeff.py b/src/protect/mutation_annotation/snpeff.py index 43ed03c0..ab54dc24 100644 --- a/src/protect/mutation_annotation/snpeff.py +++ b/src/protect/mutation_annotation/snpeff.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import, print_function + from math import ceil from protect.common import (docker_call, @@ -45,7 +45,7 @@ def run_snpeff(job, merged_mutation_file, univ_options, snpeff_options): 'snpeff_index.tar.gz': snpeff_options['index']} input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) input_files['snpeff_index'] = untargz(input_files['snpeff_index.tar.gz'], work_dir) - input_files = {key: docker_path(path) for key, path in input_files.items()} + input_files = {key: docker_path(path) for key, path in list(input_files.items())} parameters = ['eff', '-dataDir', input_files['snpeff_index'], diff --git a/src/protect/mutation_calling/__init__.py b/src/protect/mutation_calling/__init__.py index ba9b9130..dabfb0af 100644 --- a/src/protect/mutation_calling/__init__.py +++ b/src/protect/mutation_calling/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,4 +13,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import + diff --git a/src/protect/mutation_calling/common.py b/src/protect/mutation_calling/common.py index 947426d2..cac561d4 100644 --- a/src/protect/mutation_calling/common.py +++ b/src/protect/mutation_calling/common.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import, print_function + from collections import defaultdict from protect.common import chrom_sorted, export_results, get_files_from_filestore, untargz @@ -63,7 +63,7 @@ def run_mutation_aggregator(job, mutation_results, univ_options): """ # Setup an input data structure for the merge function out = {} - for chrom in mutation_results['mutect'].keys(): + for chrom in list(mutation_results['mutect'].keys()): out[chrom] = job.addChildJobFn(merge_perchrom_mutations, chrom, mutation_results, univ_options).rv() merged_snvs = job.addFollowOnJobFn(merge_perchrom_vcfs, out, 'merged', univ_options) @@ -110,7 +110,7 @@ def merge_perchrom_mutations(job, chrom, mutations, univ_options): accepted_hits = defaultdict(dict) - for mut_type in vcf_processor.keys(): + for mut_type in list(vcf_processor.keys()): # Get input files perchrom_mutations = {caller: vcf_processor[mut_type][caller](job, mutations[caller][chrom], work_dir, univ_options) @@ -119,12 +119,12 @@ def merge_perchrom_mutations(job, chrom, mutations, univ_options): perchrom_mutations['strelka'] = perchrom_mutations['strelka_' + mut_type] perchrom_mutations.pop('strelka_' + mut_type) # Read in each file to a dict - vcf_lists = {caller: read_vcf(vcf_file) for caller, vcf_file in perchrom_mutations.items()} - all_positions = list(set(itertools.chain(*vcf_lists.values()))) + vcf_lists = {caller: read_vcf(vcf_file) for caller, vcf_file in list(perchrom_mutations.items())} + all_positions = list(set(itertools.chain(*list(vcf_lists.values())))) for position in sorted(all_positions): - hits = {caller: position in vcf_lists[caller] for caller in perchrom_mutations.keys()} + hits = {caller: position in vcf_lists[caller] for caller in list(perchrom_mutations.keys())} if sum(hits.values()) >= majority[mut_type]: - callers = ','.join([caller for caller, hit in hits.items() if hit]) + callers = ','.join([caller for caller, hit in list(hits.items()) if hit]) assert position[1] not in accepted_hits[position[0]] accepted_hits[position[0]][position[1]] = (position[2], position[3], callers) @@ -133,7 +133,7 @@ def merge_perchrom_mutations(job, chrom, mutations, univ_options): print('##INFO=') @@ -297,7 +297,8 @@ def split_fusion_transcript(annotation_path, transcripts): forward = 'ACGTN' reverse = 'TGCAN' - trans = string.maketrans(forward, reverse) + # string.maketrans depricated, so each type has its own maketrans methods + trans = str.maketrans(forward, reverse) # Pull in assembled transcript annotation five_pr_splits = collections.defaultdict(dict) @@ -435,13 +436,13 @@ def reformat_star_fusion_output(job, score = 'Junction:%s-Spanning:%s' % (record.JunctionReadCount, record.SpanningFragCount) # Add empty sequences in case Trinity doesn't output one - if len(five_pr_splits[fusion].keys()) == 0: + if len(list(five_pr_splits[fusion].keys())) == 0: five_pr_splits[fusion]['N/A'] = '.' - if len(three_pr_splits[fusion].keys()) == 0: + if len(list(three_pr_splits[fusion].keys())) == 0: three_pr_splits[fusion]['N/A'] = '.' - for transcript_id in five_pr_splits[fusion].keys(): + for transcript_id in list(five_pr_splits[fusion].keys()): five_prime_seq = five_pr_splits[fusion][transcript_id] three_prime_seq = three_pr_splits[fusion][transcript_id] diff --git a/src/protect/mutation_calling/indel.py b/src/protect/mutation_calling/indel.py index 7c8990da..fc05981a 100644 --- a/src/protect/mutation_calling/indel.py +++ b/src/protect/mutation_calling/indel.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function + def run_indel_caller(job, tumor_bam, normal_bam, univ_options, indel_options): diff --git a/src/protect/mutation_calling/muse.py b/src/protect/mutation_calling/muse.py index 28be1c6b..fc837067 100644 --- a/src/protect/mutation_calling/muse.py +++ b/src/protect/mutation_calling/muse.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function + from collections import defaultdict from math import ceil @@ -125,7 +125,7 @@ def run_muse_perchrom(job, tumor_bam, normal_bam, univ_options, muse_options, ch for key in ('genome.fa', 'genome.fa.fai'): input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir) - input_files = {key: docker_path(path) for key, path in input_files.items()} + input_files = {key: docker_path(path) for key, path in list(input_files.items())} output_prefix = os.path.join(work_dir, chrom) @@ -162,9 +162,9 @@ def run_muse_sump_perchrom(job, muse_output, univ_options, muse_options, chrom): tbi = os.path.splitext(input_files['dbsnp_coding.vcf.gz.tbi.tmp'])[0] time.sleep(2) shutil.copy(input_files['dbsnp_coding.vcf.gz.tbi.tmp'], tbi) - os.chmod(tbi, 0777) + os.chmod(tbi, 0o777) open(tbi, 'a').close() - input_files = {key: docker_path(path) for key, path in input_files.items()} + input_files = {key: docker_path(path) for key, path in list(input_files.items())} output_file = ''.join([work_dir, '/', chrom, '.vcf']) parameters = ['sump', diff --git a/src/protect/mutation_calling/mutect.py b/src/protect/mutation_calling/mutect.py index eb1a556f..9068b7b8 100644 --- a/src/protect/mutation_calling/mutect.py +++ b/src/protect/mutation_calling/mutect.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function + from collections import defaultdict from math import ceil @@ -105,7 +105,9 @@ def run_mutect_perchrom(job, tumor_bam, normal_bam, univ_options, mutect_options :return: fsID for the chromsome vcf :rtype: toil.fileStore.FileID """ + work_dir = os.getcwd() + input_files = { 'tumor.bam': tumor_bam['tumor_dna_fix_pg_sorted.bam'], 'tumor.bam.bai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'], @@ -119,13 +121,14 @@ def run_mutect_perchrom(job, tumor_bam, normal_bam, univ_options, mutect_options 'dbsnp.vcf.gz': mutect_options['dbsnp_vcf'], 'dbsnp.vcf.idx.tar.gz': mutect_options['dbsnp_idx']} input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) + # dbsnp.vcf should be bgzipped, but all others should be tar.gz'd input_files['dbsnp.vcf'] = gunzip(input_files['dbsnp.vcf.gz']) + #input_files['dbsnp.vcf'] = for key in ('genome.fa', 'genome.fa.fai', 'genome.dict', 'cosmic.vcf', 'cosmic.vcf.idx', 'dbsnp.vcf.idx'): input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir) - input_files = {key: docker_path(path) for key, path in input_files.items()} - + input_files = {key: docker_path(path) for key, path in list(input_files.items())} mutout = ''.join([work_dir, '/', chrom, '.out']) mutvcf = ''.join([work_dir, '/', chrom, '.vcf']) parameters = ['-R', input_files['genome.fa'], diff --git a/src/protect/mutation_calling/radia.py b/src/protect/mutation_calling/radia.py index 178bd8dd..9ba7b863 100644 --- a/src/protect/mutation_calling/radia.py +++ b/src/protect/mutation_calling/radia.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function + from collections import defaultdict from math import ceil @@ -87,7 +87,7 @@ def run_radia(job, rna_bam, tumor_bam, normal_bam, univ_options, radia_options): +- 'chrM': fsID :rtype: dict """ - if 'rna_genome' in rna_bam.keys(): + if 'rna_genome' in list(rna_bam.keys()): rna_bam = rna_bam['rna_genome'] elif set(rna_bam.keys()) == {'rna_genome_sorted.bam', 'rna_genome_sorted.bam.bai'}: pass @@ -151,7 +151,7 @@ def run_radia_perchrom(job, bams, univ_options, radia_options, chrom): for key in ('genome.fa', 'genome.fa.fai'): input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir) - input_files = {key: docker_path(path) for key, path in input_files.items()} + input_files = {key: docker_path(path) for key, path in list(input_files.items())} radia_output = ''.join([work_dir, '/radia_', chrom, '.vcf']) radia_log = ''.join([work_dir, '/radia_', chrom, '_radia.log']) @@ -214,7 +214,7 @@ def run_filter_radia(job, bams, radia_file, univ_options, radia_options, chrom): for key in ('cosmic_beds', 'dbsnp_beds', 'retrogene_beds', 'pseudogene_beds', 'gencode_beds'): input_files[key] = untargz(input_files[key], work_dir) - input_files = {key: docker_path(path) for key, path in input_files.items()} + input_files = {key: docker_path(path) for key, path in list(input_files.items())} filterradia_log = ''.join([work_dir, '/radia_filtered_', chrom, '_radia.log']) parameters = [univ_options['patient'], # shortID diff --git a/src/protect/mutation_calling/somaticsniper.py b/src/protect/mutation_calling/somaticsniper.py index ae148f04..42825439 100644 --- a/src/protect/mutation_calling/somaticsniper.py +++ b/src/protect/mutation_calling/somaticsniper.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function + from collections import defaultdict from math import ceil @@ -146,7 +146,7 @@ def run_somaticsniper_full(job, tumor_bam, normal_bam, univ_options, somaticsnip for key in ('genome.fa', 'genome.fa.fai'): input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir) - input_files = {key: docker_path(path) for key, path in input_files.items()} + input_files = {key: docker_path(path) for key, path in list(input_files.items())} output_file = os.path.join(work_dir, 'somatic-sniper_full.vcf') parameters = ['-f', input_files['genome.fa'], @@ -190,7 +190,7 @@ def filter_somaticsniper(job, tumor_bam, somaticsniper_output, tumor_pileup, uni for key in ('genome.fa', 'genome.fa.fai'): input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir) - input_files = {key: docker_path(path) for key, path in input_files.items()} + input_files = {key: docker_path(path) for key, path in list(input_files.items())} # Run snpfilter.pl parameters = ['snpfilter.pl', @@ -278,7 +278,7 @@ def run_pileup(job, tumor_bam, univ_options, somaticsniper_options): for key in ('genome.fa', 'genome.fa.fai'): input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir) - input_files = {key: docker_path(path) for key, path in input_files.items()} + input_files = {key: docker_path(path) for key, path in list(input_files.items())} parameters = ['pileup', '-cvi', diff --git a/src/protect/mutation_calling/strelka.py b/src/protect/mutation_calling/strelka.py index 0a1a0e7b..172e4691 100644 --- a/src/protect/mutation_calling/strelka.py +++ b/src/protect/mutation_calling/strelka.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function + from math import ceil from protect.common import (docker_call, @@ -130,7 +130,7 @@ def run_strelka_full(job, tumor_bam, normal_bam, univ_options, strelka_options): for key in ('genome.fa', 'genome.fa.fai', 'config.ini'): input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir) - input_files = {key: docker_path(path) for key, path in input_files.items()} + input_files = {key: docker_path(path) for key, path in list(input_files.items())} parameters = [input_files['config.ini'], input_files['tumor.bam'], diff --git a/src/protect/mutation_translation.py b/src/protect/mutation_translation.py index 68e76221..21f3ee1e 100644 --- a/src/protect/mutation_translation.py +++ b/src/protect/mutation_translation.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import, print_function + from collections import defaultdict from math import ceil @@ -83,12 +83,12 @@ def run_transgene(job, snpeffed_file, rna_bam, univ_options, transgene_options, input_files['pepts.fa'] = untargz(input_files['pepts.fa.tar.gz'], work_dir) input_files['genome.fa'] = untargz(input_files['genome.fa.tar.gz'], work_dir) input_files['annotation.gtf'] = untargz(input_files['annotation.gtf.tar.gz'], work_dir) - input_files = {key: docker_path(path) for key, path in input_files.items()} + input_files = {key: docker_path(path) for key, path in list(input_files.items())} parameters = ['--peptides', input_files['pepts.fa'], '--prefix', 'transgened', '--pep_lens', '9,10,15', - '--cores', str(transgene_options['n']), + '--cores', str(20), '--genome', input_files['genome.fa'], '--annotation', input_files['annotation.gtf']] @@ -107,7 +107,7 @@ def run_transgene(job, snpeffed_file, rna_bam, univ_options, transgene_options, fusion_files = get_files_from_filestore(job, fusion_files, work_dir, docker=False) fusion_files['transcripts.fa'] = untargz(fusion_files['transcripts.fa.tar.gz'], work_dir) - fusion_files = {key: docker_path(path) for key, path in fusion_files.items()} + fusion_files = {key: docker_path(path) for key, path in list(fusion_files.items())} parameters += ['--transcripts', fusion_files['transcripts.fa'], '--fusions', fusion_files['fusion_calls']] diff --git a/src/protect/pipeline/ProTECT.py b/src/protect/pipeline/ProTECT.py index 6b40fb79..c0ce828b 100644 --- a/src/protect/pipeline/ProTECT.py +++ b/src/protect/pipeline/ProTECT.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -21,7 +21,7 @@ Program info can be found in the docstring of the main function. Details can also be obtained by running the script with -h . """ -from __future__ import print_function + from collections import defaultdict from multiprocessing import cpu_count @@ -160,7 +160,7 @@ def _add_default_entries(input_dict, defaults_dict): :return: updated dict :rtype: dict """ - for key, value in defaults_dict.iteritems(): + for key, value in defaults_dict.items(): if key == 'patients': print('Cannot default `patients`.') continue @@ -302,8 +302,8 @@ def parse_patients(job, patient_dict, skip_fusions=False): if f + '_fastq_2' not in output_dict: output_dict[f + '_fastq_2'] = get_fastq_2(job, patient_dict['patient_id'], f, output_dict[f + '_fastq_1']) - output_dict['gdc_inputs'] = [k for k, v in output_dict.items() if str(v).startswith('gdc')] - if not any('dna' in k for k in output_dict.keys()): + output_dict['gdc_inputs'] = [k for k, v in list(output_dict.items()) if str(v).startswith('gdc')] + if not any('dna' in k for k in list(output_dict.keys())): # There are no input DNA files so we cannot filter for oxog output_dict['filter_for_OxoG'] = False return output_dict @@ -354,7 +354,7 @@ def _parse_config_file(job, config_file, max_cores=None): # Flags to check for presence of encryption keys if required gdc_inputs = ssec_encrypted = False - for key in input_config.keys(): + for key in list(input_config.keys()): if key == 'patients': # Ensure each patient contains the required entries for sample_name in input_config[key]: @@ -410,7 +410,7 @@ def _parse_config_file(job, config_file, max_cores=None): if key == 'alignment': append_subgroup = ['post'] elif key == 'mutation_calling': - mutation_caller_list = input_config[key].keys() + mutation_caller_list = list(input_config[key].keys()) append_subgroup = [] else: append_subgroup = [] @@ -443,7 +443,7 @@ def parse_config_file(job, config_file, max_cores=None): sample_set, univ_options, processed_tool_inputs = _parse_config_file(job, config_file, max_cores) # Start a job for each sample in the sample set - for patient_id in sample_set.keys(): + for patient_id in list(sample_set.keys()): job.addFollowOnJobFn(launch_protect, sample_set[patient_id], univ_options, processed_tool_inputs) return None @@ -513,7 +513,7 @@ def launch_protect(job, patient_data, univ_options, tool_options): haplotype_patient = job.wrapJobFn(get_patient_mhc_haplotype, sample_prep.rv()) sample_prep.addChild(haplotype_patient) else: - assert None not in fastq_files.values() + assert None not in list(fastq_files.values()) # We are guaranteed to have fastqs here for sample_type in 'tumor_dna', 'normal_dna', 'tumor_rna': phlat_files[sample_type] = job.wrapJobFn( @@ -633,7 +633,7 @@ def launch_protect(job, patient_data, univ_options, tool_options): # Fusions have been handled above, and we don't need to align DNA get_mutations = None else: - assert (None, None) not in zip(fastq_files.values(), bam_files.values()) + assert (None, None) not in list(zip(list(fastq_files.values()), list(bam_files.values()))) for sample_type in 'tumor_dna', 'normal_dna': if bam_files[sample_type] is None: assert fastq_files[sample_type] is not None @@ -677,7 +677,7 @@ def launch_protect(job, patient_data, univ_options, tool_options): bam_files[sample_type].addChild(mutations[caller]) bam_files['tumor_rna'].addChild(mutations['radia']) get_mutations = job.wrapJobFn(run_mutation_aggregator, - {caller: cjob.rv() for caller, cjob in mutations.items()}, + {caller: cjob.rv() for caller, cjob in list(mutations.items())}, univ_options, disk='100M', memory='100M', cores=1).encapsulate() for caller in mutations: @@ -687,7 +687,6 @@ def launch_protect(job, patient_data, univ_options, tool_options): # We may need the tumor one depending on OxoG if not patient_data['filter_for_OxoG']: get_mutations.addChild(delete_bam_files['tumor_dna']) - if get_mutations is not None: snpeff = job.wrapJobFn(run_snpeff, get_mutations.rv(), univ_options, tool_options['snpeff'], disk=PromisedRequirement(snpeff_disk, @@ -714,18 +713,15 @@ def launch_protect(job, patient_data, univ_options, tool_options): transgene.addChild(delete_bam_files['tumor_dna']) if fusions: fusions.addChild(transgene) - spawn_mhc = job.wrapJobFn(spawn_antigen_predictors, transgene.rv(), haplotype_patient.rv(), univ_options, (tool_options['mhci'], tool_options['mhcii']), disk='100M', memory='100M', cores=1).encapsulate() haplotype_patient.addChild(spawn_mhc) transgene.addChild(spawn_mhc) - merge_mhc = job.wrapJobFn(merge_mhc_peptide_calls, spawn_mhc.rv(), transgene.rv(), univ_options, disk='100M', memory='100M', cores=1) spawn_mhc.addFollowOn(merge_mhc) transgene.addChild(merge_mhc) - rankboost = job.wrapJobFn(wrap_rankboost, rsem.rv(), merge_mhc.rv(), transgene.rv(), univ_options, tool_options['rankboost'], disk='100M', memory='100M', cores=1) diff --git a/src/protect/pipeline/__init__.py b/src/protect/pipeline/__init__.py index ba9b9130..dabfb0af 100644 --- a/src/protect/pipeline/__init__.py +++ b/src/protect/pipeline/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,4 +13,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import + diff --git a/src/protect/qc/__init__.py b/src/protect/qc/__init__.py index ba9b9130..dabfb0af 100644 --- a/src/protect/qc/__init__.py +++ b/src/protect/qc/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,4 +13,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import + diff --git a/src/protect/qc/rna.py b/src/protect/qc/rna.py index 42334920..37b8ad99 100644 --- a/src/protect/qc/rna.py +++ b/src/protect/qc/rna.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function + from math import ceil from protect.common import docker_call, docker_path, get_files_from_filestore, is_gzipfile @@ -47,7 +47,7 @@ def run_cutadapt(job, fastqs, univ_options, cutadapt_options): for read_file in 'rna_1.fastq', 'rna_2.fastq': os.symlink(read_file, read_file + gz) input_files[read_file + gz] = input_files[read_file] + gz - input_files = {key: docker_path(path) for key, path in input_files.items()} + input_files = {key: docker_path(path) for key, path in list(input_files.items())} parameters = ['-a', cutadapt_options['a'], # Fwd read 3' adapter '-A', cutadapt_options['A'], # Rev read 3' adapter '-m', '35', # Minimum size of read diff --git a/src/protect/rankboost.py b/src/protect/rankboost.py index bd9bf514..2c0b0a53 100644 --- a/src/protect/rankboost.py +++ b/src/protect/rankboost.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # diff --git a/src/protect/test/__init__.py b/src/protect/test/__init__.py index 382c581b..219056ee 100644 --- a/src/protect/test/__init__.py +++ b/src/protect/test/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -16,7 +16,7 @@ # A lot of this code was taken from toil/test/src/__init__.py -from __future__ import absolute_import + import logging import os import tempfile diff --git a/src/protect/test/ci/__init__.py b/src/protect/test/ci/__init__.py index ba9b9130..dabfb0af 100644 --- a/src/protect/test/ci/__init__.py +++ b/src/protect/test/ci/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,4 +13,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import + diff --git a/src/protect/test/ci/test_protect.py b/src/protect/test/ci/test_protect.py index 2fb3eae8..906160dc 100644 --- a/src/protect/test/ci/test_protect.py +++ b/src/protect/test/ci/test_protect.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -19,7 +19,7 @@ Affiliation : UCSC BME, UCSC Genomics Institute File : protect/test/test_reporting.py """ -from __future__ import print_function + from protect.test import ProtectTest @@ -173,8 +173,8 @@ def _test_ran_successfully(self, expected_dirs): expected_contents = {} for dir in expected_dirs: if isinstance(dir, dict): - assert len(dir.keys()) == 1 - if dir.keys()[0] == 'mutations': + assert len(list(dir.keys())) == 1 + if list(dir.keys())[0] == 'mutations': expected_contents['mutations'] = ('/mnt/ephemeral/done/TEST/mutations', sorted(dir['mutations']), []) @@ -198,7 +198,7 @@ def _test_ran_successfully(self, expected_dirs): else: expected_contents['mutations_' + caller] = \ contents_per_dir['mutations'][caller] - elif dir.keys()[0] == 'alignments': + elif list(dir.keys())[0] == 'alignments': alignment_files = [] for tissue_type in dir['alignments']: alignment_files.extend(contents_per_dir['alignments'][tissue_type]) @@ -211,7 +211,7 @@ def _test_ran_successfully(self, expected_dirs): expected_contents[dir] = contents_per_dir[dir] expected_outputs = [('/mnt/ephemeral/done/TEST', - sorted([x for x in expected_contents.keys() + sorted([x for x in list(expected_contents.keys()) if not x.startswith('mutations_')]), [])] expected_outputs.extend([expected_contents[d] for d in sorted(expected_contents.keys())]) diff --git a/src/protect/test/unit/__init__.py b/src/protect/test/unit/__init__.py index ba9b9130..dabfb0af 100644 --- a/src/protect/test/unit/__init__.py +++ b/src/protect/test/unit/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -13,4 +13,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import + diff --git a/src/protect/test/unit/test_alignments.py b/src/protect/test/unit/test_alignments.py index a8d2d3a8..0ea387eb 100644 --- a/src/protect/test/unit/test_alignments.py +++ b/src/protect/test/unit/test_alignments.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -19,7 +19,7 @@ Affiliation : UCSC BME, UCSC Genomics Institute File : protect/test/test_file_downloads.py """ -from __future__ import print_function + import os import subprocess @@ -75,7 +75,7 @@ def _get_test_bwa_files(job): :return: FSID for the rsem file """ - base_call = 's3am download s3://cgl-pipeline-inputs/protect/ci_references/' + base_call = 's3am download S3://protect-data/hg38_references/' subprocess.check_call((base_call + 'Tum_1.fq.gz Tum_1.fq.gz').split(' ')) subprocess.check_call((base_call + 'Tum_2.fq.gz Tum_2.fq.gz').split(' ')) return [job.fileStore.writeGlobalFile('Tum_1.fq.gz'), @@ -105,7 +105,7 @@ def _get_test_star_files(job): :return: FSID for the rsem file """ - base_call = 's3am download s3://cgl-pipeline-inputs/protect/ci_references/' + base_call = 's3am download S3://protect-data/hg38_references/' subprocess.check_call((base_call + 'Rna_1.fq.gz Rna_1.fq.gz').split(' ')) subprocess.check_call((base_call + 'Rna_2.fq.gz Rna_2.fq.gz').split(' ')) return [job.fileStore.writeGlobalFile('Rna_1.fq.gz'), diff --git a/src/protect/test/unit/test_file_downloads.py b/src/protect/test/unit/test_file_downloads.py index 35994668..39f115a2 100644 --- a/src/protect/test/unit/test_file_downloads.py +++ b/src/protect/test/unit/test_file_downloads.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -19,7 +19,7 @@ Affiliation : UCSC BME, UCSC Genomics Institute File : protect/test/test_file_downloads.py """ -from __future__ import print_function + from protect.common import get_file_from_s3 from protect.common import get_file_from_url diff --git a/src/protect/test/unit/test_mutation_callers.py b/src/protect/test/unit/test_mutation_callers.py index efa5d3e7..c30702da 100644 --- a/src/protect/test/unit/test_mutation_callers.py +++ b/src/protect/test/unit/test_mutation_callers.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -19,7 +19,7 @@ Affiliation : UCSC BME, UCSC Genomics Institute File : protect/test/test_file_downloads.py """ -from __future__ import print_function + from protect.common import untargz from protect.mutation_calling.muse import run_muse from protect.mutation_calling.mutect import run_mutect @@ -101,7 +101,7 @@ def test_star_fusion(self): def _get_fusion_options(job): star_fusion_options = {} fusion_inspector_options = {} - call = 's3am download S3://cgl-pipeline-inputs/protect/ci_references/ci_star_fusion_compatible_index.tar.gz ./index.tar.gz' + call = 's3am download S3://protect-data/hg38_references/ci_star_fusion_compatible_index.tar.gz ./index.tar.gz' subprocess.check_call(call.split(' ')) star_fusion_options['index'] = fusion_inspector_options['index'] = job.fileStore.writeGlobalFile('index.tar.gz') # Trinity now sets a minimum for the number of reads, so don't run it @@ -192,7 +192,7 @@ def _get_test_fusion_reads(job): :return: FSID for each paired FASTQ """ - base_call = 's3am download S3://cgl-pipeline-inputs/protect/ci_references/' + base_call = 's3am download S3://protect-data/hg38_references/' samples = ['RNA_CD74_ROS1_1.fq.gz', 'RNA_CD74_ROS1_2.fq.gz'] for sample in samples: call = '{base}{sample} ./{sample}'.format(base=base_call, sample=sample) @@ -209,7 +209,7 @@ def _get_test_fusion_junction(job): :return: FSID for each paired FASTQ """ - base_call = 's3am download S3://cgl-pipeline-inputs/protect/ci_references/' + base_call = 's3am download S3://protect-data/hg38_references/' sample = 'CD74_ROS1_Chimeric.out.junction' call = '{base}{sample} ./ChimericJunction'.format(base=base_call, sample=sample) subprocess.check_call(call.split(' ')) diff --git a/src/protect/test/unit/test_rankboost.py b/src/protect/test/unit/test_rankboost.py index 0c271f19..442ae22b 100644 --- a/src/protect/test/unit/test_rankboost.py +++ b/src/protect/test/unit/test_rankboost.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -19,7 +19,7 @@ Affiliation : UCSC BME, UCSC Genomics Institute File : protect/test/test_rankboost.py """ -from __future__ import print_function + from protect.pipeline.ProTECT import _parse_config_file from protect.rankboost import wrap_rankboost diff --git a/src/protect/test/unit/test_reporting.py b/src/protect/test/unit/test_reporting.py index 9c17fc91..00572421 100644 --- a/src/protect/test/unit/test_reporting.py +++ b/src/protect/test/unit/test_reporting.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -19,7 +19,7 @@ Affiliation : UCSC BME, UCSC Genomics Institute File : protect/test/test_reporting.py """ -from __future__ import print_function + import os diff --git a/src/protect/test/unit/test_snpeff.py b/src/protect/test/unit/test_snpeff.py index 39381c55..c016123d 100644 --- a/src/protect/test/unit/test_snpeff.py +++ b/src/protect/test/unit/test_snpeff.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -19,7 +19,7 @@ Affiliation : UCSC BME, UCSC Genomics Institute File : protect/test/test_snpeff.py """ -from __future__ import print_function + from protect.mutation_annotation.snpeff import run_snpeff from protect.pipeline.ProTECT import _parse_config_file diff --git a/src/protect/test/unit/test_spawn_antigen_predictors.py b/src/protect/test/unit/test_spawn_antigen_predictors.py index f00bb393..db3bfa5c 100644 --- a/src/protect/test/unit/test_spawn_antigen_predictors.py +++ b/src/protect/test/unit/test_spawn_antigen_predictors.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # @@ -19,7 +19,7 @@ Affiliation : UCSC BME, UCSC Genomics Institute File : protect/test/test_spawn_antigen_predictors.py """ -from __future__ import print_function + from protect.binding_prediction.common import spawn_antigen_predictors, merge_mhc_peptide_calls from protect.pipeline.ProTECT import _parse_config_file diff --git a/src/protect/test/unit/test_transgene b/src/protect/test/unit/test_transgene index 8adfeaec..5a7ac91e 100644 --- a/src/protect/test/unit/test_transgene +++ b/src/protect/test/unit/test_transgene @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao # diff --git a/src/protect/version.py b/src/protect/version.py index f0db84aa..b42ec478 100644 --- a/src/protect/version.py +++ b/src/protect/version.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python3 # Copyright 2016 UCSC Computational Genomics Lab # Original contributor: Arjun Arkal Rao #