diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5a3c400..3a7d0e6 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,7 +3,9 @@ image: openjdk:8u292-jre-buster before_script: - java -version - - apt-get update && apt-get --assume-yes install wget make procps + - apt-get update && apt-get --assume-yes install wget make procps software-properties-common + #- apt-get --assume-yes install python3 python3-pip + #- pip3 install biopython==1.76 - wget -qO- https://get.nextflow.io | bash && cp nextflow /usr/local/bin/nextflow - nextflow help - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh diff --git a/Makefile b/Makefile index 8489039..51b0820 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,7 @@ clean: test: nextflow main.nf --help + nextflow main.nf -profile test,conda --initialize nextflow main.nf -profile test,conda --name ERR4145453 \ --output output/test1 \ --fastq1 test_data/ERR4145453_1.fastq.gz \ @@ -20,6 +21,10 @@ test: --fastq1 test_data/ERR4145453_1.fastq.gz \ --fastq2 test_data/ERR4145453_2.fastq.gz \ --keep_intermediate + nextflow main.nf -profile test,conda --name hCoV-19_NTXX \ + --output output/test4 \ + --fasta test_data/hCoV-19_NTXX.fasta + #python3 -m unittest bin/test_assembly_variant_caller.py check: test -s output/test1/ERR4145453/ERR4145453.bcftools.normalized.annotated.vcf.gz || { echo "Missing test 1 VCF output file!"; exit 1; } @@ -38,5 +43,6 @@ check: test -s output/test3/ERR4145453/ERR4145453.gatk.normalized.annotated.vcf.gz || { echo "Missing test 3 VCF output file!"; exit 1; } test -s output/test3/ERR4145453/ERR4145453.lofreq.normalized.annotated.vcf.gz || { echo "Missing test 3 VCF output file!"; exit 1; } test -s output/test3/ERR4145453/ERR4145453.ivar.tsv || { echo "Missing test 3 VCF output file!"; exit 1; } - test -s output/test3/ERR4145453/ERR4145453.fastp_stats.json || { echo "Missing test 2 VCF output file!"; exit 1; } - test -s output/test3/ERR4145453/ERR4145453.fastp_stats.html || { echo "Missing test 2 VCF output file!"; exit 1; } \ No newline at end of file + test -s output/test3/ERR4145453/ERR4145453.fastp_stats.json || { echo "Missing test 3 VCF output file!"; exit 1; } + test -s output/test3/ERR4145453/ERR4145453.fastp_stats.html || { echo "Missing test 3 VCF output file!"; exit 1; } + test -s output/test4/hCoV-19_NTXX/hCoV-19_NTXX.assembly.normalized.annotated.vcf.gz || { echo "Missing test 4 VCF output file!"; exit 1; } \ No newline at end of file diff --git a/README.md b/README.md index 6bce6c9..d1bfa15 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,11 @@ -# Covigator NGS pipeline +# Covigator pipeline [![DOI](https://zenodo.org/badge/374669617.svg)](https://zenodo.org/badge/latestdoi/374669617) -The Covigator NGS pipeline process SARS-CoV-2 FASTQ files into analysis ready VCF files. The pipeline is implemented in the Nextflow framework (Di Tommaso, 2017). +The Covigator pipeline process SARS-CoV-2 FASTQ or FASTA files into annotated and normalized analysis ready VCF files. +The pipeline is implemented in the Nextflow framework (Di Tommaso, 2017). -The pipeline includes the following steps: +When FASTQ files are provided the pipeline includes the following steps: - **Trimming**. `fastp` is used to trim reads with default values. - **Alignment**. `BWA mem` is used for the alignment of single or paired end samples. - **BAM preprocessing**. BAM files are prepared and duplicate reads are marked using GATK and Picard tools. @@ -13,7 +14,20 @@ The pipeline includes the following steps: - **Variant normalization**. `bcftools norm` and `vt` tools are employed to left align indels, trim variant calls and remove variant duplicates. - **Variant consequence annotation**. `SnpEff` is employed to annotate the variant consequences of variants. -The alignment, BAM preprocessing and variant normalization pipelines were implemented in additional Nextflow pipelines within the TronFlow initiative. +Both single end and paired end FASTQ files are supported. + +When a FASTA file is provided with a single assembly sequence the pipeline includes the following steps: +- **Variant calling**. A Smith-Waterman global alignment is performed against the reference sequence to call SNVs and + indels. Indels longer than 50 bp and at the beginning or end of the assembly sequence are excluded. Any mutation where + either reference or assembly contain a N is excluded. +- **Variant normalization**. `bcftools norm` and `vt` tools are employed to left align indels, trim variant calls and remove variant duplicates. +- **Variant consequence annotation**. `SnpEff` is employed to annotate the variant consequences of variants. + +The FASTA file is expected to contain a single assembly sequence. +Bear in mind that only clonal variants can be called on the assembly. + +The alignment, BAM preprocessing and variant normalization pipelines were implemented in additional Nextflow pipelines +within the TronFlow initiative. The full details are available in their respective repositories: - https://github.com/TRON-Bioinformatics/tronflow-bwa (https://doi.org/10.5281/zenodo.4722852) - https://github.com/TRON-Bioinformatics/tronflow-bam-preprocessing (https://doi.org/10.5281/zenodo.4810918) @@ -44,6 +58,15 @@ variants with a VAF < 20 % are considered `LOW_FREQUENCY` and variants with a VA ## How to run it +If you are going to use it with the conda environments, first initialize the environments by running: +``` +nextflow main.nf -profile conda --initialize +``` + +This will create the necessary conda environments under `work/conda`. +This initialization is required under every work folder when using more than one variant caller. + +Then run the application as follows: ``` $ nextflow run tron-bioinformatics/covigator-ngs-pipeline -profile conda --help @@ -51,10 +74,11 @@ Usage: nextflow run tron-bioinformatics/covigator-ngs-pipeline -profile conda --help Input: - * --fastq1: the first input FASTQ file + * --fastq1: the first input FASTQ file (not compatible with --fasta) + * --fasta: the FASTA file containing the assembly sequence (not compatible with --fastq1) * --name: the sample name, output files will be named after this name * --reference: the reference genome FASTA file, *.fai, *.dict and bwa indexes are required. - * --gff: the GFFv3 gene annotations file + * --gff: the GFFv3 gene annotations file (only optional with --fastq1) * --output: the folder where to publish output Optional input: @@ -63,12 +87,17 @@ Optional input: * --min_mapping_quality: minimum mapping quality to take a read into account (default: 20) * --low_frequency_variant_threshold: VAF threshold to mark a variant as low frequency (default: 0.2) * --subclonal_variant_threshold: VAF superior threshold to mark a variant as subclonal (default: 0.8) - * --strand_bias_threshold: threshold for the strand bias test Phred score (default: 20) * --memory: the ammount of memory used by each job (default: 3g) * --cpus: the number of CPUs used by each job (default: 1) + * --initialize: start the initialization of the conda environments + * -- skip_lofreq: skips calling variants with LoFreq + * -- skip_gatk: skips calling variants with GATK + * -- skip_bcftools: skips calling variants with BCFTools + * -- skip_ivar: skips calling variants with iVar Output: - * Output a normalized, phased and annotated VCF file for each of BCFtools, GATK and LoFreq + * Output a normalized, phased and annotated VCF file for each of BCFtools, GATK and LoFreq when FASTQ files are + provided or a single VCF obtained from a global alignment when a FASTA file is provided * Output a TSV file output from iVar ``` @@ -95,9 +124,9 @@ A workaround to this situation is to clone the tronflow dependencies and let cov For instance: ``` cd /covigator/dependencies -git clone --branch v1.4.0 https://github.com/TRON-Bioinformatics/tronflow-bwa.git +git clone --branch v1.4.1 https://github.com/TRON-Bioinformatics/tronflow-bwa.git git clone --branch v1.5.0 https://github.com/TRON-Bioinformatics/tronflow-bam-preprocessing.git -git clone --branch v1.1.0 https://github.com/TRON-Bioinformatics/tronflow-variant-normalization.git +git clone --branch v1.1.1 https://github.com/TRON-Bioinformatics/tronflow-variant-normalization.git ``` And then use the following parameters: diff --git a/bin/assembly_variant_caller.py b/bin/assembly_variant_caller.py new file mode 100755 index 0000000..36c42d1 --- /dev/null +++ b/bin/assembly_variant_caller.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python +import os +from argparse import ArgumentParser +from dataclasses import dataclass +from Bio import Align, SeqIO +from Bio.Align import PairwiseAlignment +from typing import List + +CHROMOSOME = "MN908947.3" + + +@dataclass +class Variant: + position: int + reference: str + alternate: str + + def to_vcf_line(self): + # transform 0-based position to 1-based position + return CHROMOSOME, str(self.position + 1), ".", self.reference, self.alternate, ".", "PASS", "." + + +class AssemblyVariantCaller: + + def call_variants(self, sequence: str, reference: str) -> List[Variant]: + alignment = self._run_alignment(sequence=sequence, reference=reference) + variants = self._call_mutations(alignment) + return variants + + def _run_alignment(self, sequence: str, reference: str) -> PairwiseAlignment: + aligner = Align.PairwiseAligner() + aligner.mode = 'global' + aligner.match = 2 + aligner.mismatch = -1 + aligner.open_gap_score = -3 + aligner.extend_gap_score = -0.1 + aligner.target_end_gap_score = 0.0 + aligner.query_end_gap_score = 0.0 + alignments = aligner.align(reference, sequence) + return alignments[0] + + def _call_mutations(self, alignment: PairwiseAlignment) -> List[Variant]: + # CHROM POS ID REF ALT QUAL FILTER INFO FORMAT + # MN908947.3 9924 . C T 228 . + # DP=139;VDB=0.784386;SGB=-0.693147;RPB=0.696296;MQB=1;MQSB=1;BQB=0.740741;MQ0F=0;AC=1;AN=1;DP4=2,0,123,12;MQ=60 + # GT:PL 1:255,0 + alternate = alignment.query + reference = alignment.target + + variants = [] + prev_ref_end = None + prev_alt_end = None + for (ref_start, ref_end), (alt_start, alt_end) in zip(alignment.aligned[0], alignment.aligned[1]): + # calls indels + # NOTE: it does not call indels at beginning and end of sequence + if prev_ref_end is not None and prev_ref_end != ref_start: + # deletion + if ref_start - prev_ref_end <= 50: # skips deletions longer than 50 bp + ref = reference[prev_ref_end - 1: ref_start] + if 'N' not in ref: # do not call deletions with Ns + variants.append(Variant( + position=prev_ref_end - 1, + reference=ref, + alternate=reference[prev_ref_end - 1])) + elif prev_ref_end is not None and prev_alt_end != alt_start: + # insertion + if alt_start - prev_alt_end <= 50: # skips insertions longer than 50 bp + ref = reference[prev_ref_end - 1] + alt = alternate[prev_alt_end:alt_start] + if ref != 'N' and 'N' not in alt: # do not call insertions with Ns + variants.append(Variant( + position=prev_ref_end - 1, + reference=ref, + alternate=ref + alt)) + + # calls SNVs + for pos, ref, alt in zip( + range(ref_start, ref_end), reference[ref_start: ref_end], alternate[alt_start: alt_end]): + # contiguous SNVs are reported separately + if ref != alt and ref != 'N' and alt != 'N': # do not call SNVs on Ns + variants.append(Variant(position=pos, reference=ref, alternate=alt)) + + prev_ref_end = ref_end + prev_alt_end = alt_end + + return variants + + +def write_vcf(mutations, output_vcf): + with open(output_vcf, "w") as vcf_out: + header = ( + "##fileformat=VCFv4.0", + "##FILTER=", + "##contig=", + "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO" + ) + for row in header: + vcf_out.write(row + "\n") + for row in mutations: + vcf_out.write("\t".join(row.to_vcf_line()) + "\n") + + +def main(): + parser = ArgumentParser(description="Run Pipeline for testing") + parser.add_argument("--fasta", dest="fasta", + help="The fasta file with the query sequence. Only one sequence is expected", + required=True) + parser.add_argument("--reference", dest="reference", + help="The fasta file with the reference sequence. Only one sequence is expected", + required=True) + parser.add_argument("--output-vcf", dest="output_vcf", + help="The path to the output VCF", + required=True) + args = parser.parse_args() + + assert os.path.exists(args.fasta), "Fasta file {} does not exist!".format(args.fasta) + assert os.path.exists(args.reference), "Fasta file {} does not exist!".format(args.reference) + + query = next(SeqIO.parse(args.fasta, "fasta")) + reference = next(SeqIO.parse(args.reference, "fasta")) + variant_caller = AssemblyVariantCaller() + variants = variant_caller.call_variants(sequence=query.seq, reference=reference.seq) + write_vcf(mutations=variants, output_vcf=args.output_vcf) + + +if __name__ == '__main__': + main() diff --git a/bin/test_assembly_variant_caller.py b/bin/test_assembly_variant_caller.py new file mode 100644 index 0000000..4cb1b12 --- /dev/null +++ b/bin/test_assembly_variant_caller.py @@ -0,0 +1,51 @@ +from unittest import TestCase + +from .assembly_variant_caller import AssemblyVariantCaller + + +class TestCountryParser(TestCase): + + def test_assembly_variant_caller(self): + caller = AssemblyVariantCaller() + # no mutations + variants = caller.call_variants(sequence="ACGTACGT", reference="ACGTACGT") + self.assertEqual(len(variants), 0) + # SNV + variants = caller.call_variants(sequence="ACGTCCGT", reference="ACGTACGT") + self.assertEqual(len(variants), 1) + snv = variants[0] + self.assertEqual(snv.reference, "A") + self.assertEqual(snv.alternate, "C") + self.assertEqual(snv.position, 4) + # deletion + variants = caller.call_variants( + reference="CTGGTGTGAGCCTGGTCACCAGGGTGGTAGGACAGACCCTCCTCTGGAGGCAAAGTGACG", + sequence="CTGGTGTGAGCCTGGTCACCAGGGTGGTAGGACAGACCCTCCTCTGGCAAAGTGACG") + self.assertEqual(len(variants), 1) + snv = variants[0] + self.assertEqual(snv.reference, "TGGA") + self.assertEqual(snv.alternate, "T") + self.assertEqual(snv.position, 44) + # insertion + variants = caller.call_variants( + sequence= "CTGGTGTGAGCCTGGTCACCAGGGTGGTAGGACAGACCCTCCTCTGCCCGAGGCAAAGTGACG", + reference="CTGGTGTGAGCCTGGTCACCAGGGTGGTAGGACAGACCCTCCTCTGGAGGCAAAGTGACG") + self.assertEqual(len(variants), 1) + snv = variants[0] + self.assertEqual(snv.reference, "G") + self.assertEqual(snv.alternate, "GCCC") + self.assertEqual(snv.position, 45) + # another insertion + variants = caller.call_variants( + sequence= "CTGGTGTGAGTCCTGGTCACCAGGGTGGTAGGACAGACCCTCCTCTGCCCGAGGCAAAGTGACG", + reference="CTGGTGTGAGCCTGGTCACCAGGGTGGTAGGACAGACCCTCCTCTGGAGGCAAAGTGACG") + self.assertEqual(len(variants), 2) + snv = variants[1] + self.assertEqual(snv.reference, "G") + self.assertEqual(snv.alternate, "GCCC") + self.assertEqual(snv.position, 45) + snv = variants[0] + self.assertEqual(snv.reference, "G") + self.assertEqual(snv.alternate, "GT") + self.assertEqual(snv.position, 9) + diff --git a/environment.yml b/environment.yml index 4fd33ba..9d0e4ea 100644 --- a/environment.yml +++ b/environment.yml @@ -1,11 +1,13 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: covigator-ngs-pipeline +name: covigator-pipeline channels: - conda-forge - bioconda - defaults dependencies: + - conda-forge::python=3.8.5 + - conda-forge::biopython=1.76 - bioconda::nextflow=21.04.0 - bioconda::bcftools=1.12 - bioconda::lofreq=2.1.5 diff --git a/main.nf b/main.nf index 9333068..0471e9e 100755 --- a/main.nf +++ b/main.nf @@ -1,9 +1,25 @@ #!/usr/bin/env nextflow params.help= false -params.fastq1 = false +params.initialize = false +if (params.initialize) { + params.fastq1 = "$baseDir/test_data/ERR4145453_1.fastq.gz" + params.skip_bcftools = true + params.skip_ivar = true + params.skip_gatk = true + params.name = "init" +} +else { + params.fastq1 = false + params.skip_ivar = false + params.skip_bcftools = false + params.skip_gatk = false + params.name = false +} + +params.skip_lofreq = false +params.fasta = false params.fastq2 = false -params.name = false params.reference = false params.gff = false params.output = "." @@ -33,7 +49,7 @@ else { reference = file(params.reference) } -if (!params.gff) { +if (!params.gff && !params.fasta) { log.error "--gff is required" exit 1 } @@ -46,8 +62,17 @@ if (!params.name) { exit 1 } -if (!params.fastq1) { - log.error "--fastq1 is required" +if (!params.fastq1 && !params.fasta) { + log.error "either --fastq1 or --fasta are required" + exit 1 +} +else if (params.fastq1 && params.fasta) { + log.error "provide only --fastq1 or --fasta" + exit 1 +} + +if (params.skip_bcftools && params.skip_gatk && params.skip_ivar && params.skip_lofreq) { + log.error "enable at least one variant caller" exit 1 } @@ -56,284 +81,324 @@ if (!params.fastq2) { library = "single" } -if (library == "paired") { - - process readTrimmingPairedEnd { - cpus params.cpus - memory params.memory - tag params.name - publishDir "${params.output}/${params.name}", mode: "copy", pattern: "*fastp_stats*" - - input: - val name from params.name - file fastq1 from file(params.fastq1) - file fastq2 from file(params.fastq2) - - output: - set name, file("${fastq1.baseName}.trimmed.fq.gz"), file("${fastq2.baseName}.trimmed.fq.gz") into trimmed_fastqs - file("${name}.fastp_stats.json") - file("${name}.fastp_stats.html") - - """ - # --input_files needs to be forced, otherwise it is inherited from profile in tests - fastp \ - --in1 ${fastq1} \ - --in2 ${fastq2} \ - --out1 ${fastq1.baseName}.trimmed.fq.gz \ - --out2 ${fastq2.baseName}.trimmed.fq.gz \ - --json ${name}.fastp_stats.json \ - --html ${name}.fastp_stats.html - """ +if (params.fastq1) { + if (library == "paired") { + + process readTrimmingPairedEnd { + cpus params.cpus + memory params.memory + tag params.name + publishDir "${params.output}/${params.name}", mode: "copy", pattern: "*fastp_stats*" + + input: + val name from params.name + file fastq1 from file(params.fastq1) + file fastq2 from file(params.fastq2) + + output: + set name, file("${fastq1.baseName}.trimmed.fq.gz"), file("${fastq2.baseName}.trimmed.fq.gz") into trimmed_fastqs + file("${name}.fastp_stats.json") + file("${name}.fastp_stats.html") + + """ + # --input_files needs to be forced, otherwise it is inherited from profile in tests + fastp \ + --in1 ${fastq1} \ + --in2 ${fastq2} \ + --out1 ${fastq1.baseName}.trimmed.fq.gz \ + --out2 ${fastq2.baseName}.trimmed.fq.gz \ + --json ${name}.fastp_stats.json \ + --html ${name}.fastp_stats.html + """ + } + + process alignmentPairedEnd { + cpus params.cpus + memory params.memory + tag params.name + + input: + set name, file(fastq1), file(fastq2) from trimmed_fastqs + + output: + set name, file("${name}.bam") into bam_files + + """ + # --input_files needs to be forced, otherwise it is inherited from profile in tests + nextflow run ${params.tronflow_bwa} \ + --input_name ${name} \ + --input_fastq1 ${fastq1} \ + --input_fastq2 ${fastq2} \ + --input_files false \ + --algorithm mem \ + --library ${library} \ + --output . \ + --reference ${reference} \ + --cpus ${task.cpus} --memory ${task.memory} \ + -profile ${workflow.profile} \ + -work-dir ${workflow.workDir} + """ + } + } + else { + + process readTrimmingSingleEnd { + cpus params.cpus + memory params.memory + tag params.name + publishDir "${params.output}/${params.name}", mode: "copy", pattern: "*fastp_stats*" + + input: + val name from params.name + file fastq1 from file(params.fastq1) + + output: + set name, file("${fastq1.baseName}.trimmed.fq.gz") into trimmed_fastqs + file("${name}.fastp_stats.json") + file("${name}.fastp_stats.html") + + """ + # --input_files needs to be forced, otherwise it is inherited from profile in tests + fastp \ + --in1 ${fastq1} \ + --out1 ${fastq1.baseName}.trimmed.fq.gz \ + --json ${name}.fastp_stats.json \ + --html ${name}.fastp_stats.html + """ + } + + process alignmentSingleEnd { + cpus params.cpus + memory params.memory + tag params.name + + input: + set name, file(fastq1) from trimmed_fastqs + + output: + set name, file("${name}.bam") into bam_files + + """ + # --input_files needs to be forced, otherwise it is inherited from profile in tests + nextflow run ${params.tronflow_bwa} \ + --input_name ${name} \ + --input_fastq1 ${fastq1} \ + --input_files false \ + --algorithm mem \ + --library ${library} \ + --output . \ + --reference ${reference} \ + --cpus ${task.cpus} --memory ${task.memory} \ + -profile ${workflow.profile} \ + -work-dir ${workflow.workDir} + """ + } } - process alignmentPairedEnd { + process bamPreprocessing { cpus params.cpus memory params.memory tag params.name + if (params.keep_intermediate) { + publishDir "${params.output}/${params.name}", mode: "copy" + } input: - set name, file(fastq1), file(fastq2) from trimmed_fastqs + set name, file(bam) from bam_files output: - set name, file("${name}.bam") into bam_files + set name, file("${name}.preprocessed.bam"), file("${name}.preprocessed.bai") into preprocessed_bams, + preprocessed_bams2, preprocessed_bams3, preprocessed_bams4 + """ - # --input_files needs to be forced, otherwise it is inherited from profile in tests - nextflow run ${params.tronflow_bwa} \ - --input_name ${name} \ - --input_fastq1 ${fastq1} \ - --input_fastq2 ${fastq2} \ + # --input_files, --known_indels1 and --known_indels2 needs to be forced, otherwise it is inherited from test profile + nextflow run ${params.tronflow_bam_preprocessing} \ + --input_bam ${bam} \ --input_files false \ - --algorithm mem \ - --library ${library} \ --output . \ --reference ${reference} \ - --cpus ${task.cpus} --memory ${task.memory} \ + --skip_bqsr --skip_metrics \ + --known_indels1 false --known_indels2 false \ + --prepare_bam_cpus ${params.cpus} --prepare_bam_memory ${params.memory} \ + --mark_duplicates_cpus ${params.cpus} --mark_duplicates_memory ${params.memory} \ -profile ${workflow.profile} \ -work-dir ${workflow.workDir} + + mv ${name}/${name}.preprocessed.bam ${name}.preprocessed.bam + mv ${name}/${name}.preprocessed.bai ${name}.preprocessed.bai """ } -} -else { - process readTrimmingSingleEnd { - cpus params.cpus - memory params.memory - tag params.name - publishDir "${params.output}/${params.name}", mode: "copy", pattern: "*fastp_stats*" + vcfs_to_normalize = null + + if (!params.skip_bcftools) { + process variantCallingBcfTools { + cpus params.cpus + memory params.memory + tag params.name + if (params.keep_intermediate) { + publishDir "${params.output}/${params.name}", mode: "copy" + } + + input: + set name, file(bam), file(bai) from preprocessed_bams + + output: + set name, file("${name}.bcftools.bcf") into bcftools_vcfs + + """ + bcftools mpileup \ + --redo-BAQ \ + --max-depth 0 \ + --min-BQ ${params.min_base_quality} \ + --min-MQ ${params.min_mapping_quality} \ + --count-orphans \ + --fasta-ref ${reference} \ + --annotate AD ${bam} | \ + bcftools call \ + --multiallelic-caller \ + --variants-only \ + --ploidy 1 | \ + bcftools filter \ + --exclude 'INFO/IMF < ${params.low_frequency_variant_threshold}' \ + --soft-filter LOW_FREQUENCY - | \ + bcftools filter \ + --exclude 'INFO/IMF >= ${params.low_frequency_variant_threshold} && INFO/IMF < ${params.subclonal_variant_threshold}' \ + --soft-filter SUBCLONAL \ + --output-type b - > ${name}.bcftools.bcf + """ + } + vcfs_to_normalize = vcfs_to_normalize == null? bcftools_vcfs : vcfs_to_normalize.concat(bcftools_vcfs) + } - input: - val name from params.name - file fastq1 from file(params.fastq1) + if (!params.skip_lofreq) { + process variantCallingLofreq { + cpus params.cpus + memory params.memory + tag params.name + if (params.keep_intermediate) { + publishDir "${params.output}/${params.name}", mode: "copy" + } + + input: + set name, file(bam), file(bai) from preprocessed_bams2 + + output: + set name, file("${name}.lofreq.vcf") into lofreq_vcfs + + """ + lofreq call \ + --min-bq ${params.min_base_quality} \ + --min-alt-bq ${params.min_base_quality} \ + --min-mq ${params.min_mapping_quality} \ + --ref ${reference} \ + --call-indels \ + <( lofreq indelqual --dindel --ref ${reference} ${bam} ) | \ + bgzip -c > ${name}.lofreq.vcf.gz + + tabix -p vcf ${name}.lofreq.vcf.gz + + # annotates low frequency and subclonal variants + bcftools view -Ob ${name}.lofreq.vcf.gz | \ + bcftools filter \ + --exclude 'INFO/AF < ${params.low_frequency_variant_threshold}' \ + --soft-filter LOW_FREQUENCY - | \ + bcftools filter \ + --exclude 'INFO/AF >= ${params.low_frequency_variant_threshold} && INFO/AF < ${params.subclonal_variant_threshold}' \ + --soft-filter SUBCLONAL - > ${name}.lofreq.vcf + """ + } + vcfs_to_normalize = vcfs_to_normalize == null? lofreq_vcfs : vcfs_to_normalize.concat(lofreq_vcfs) + } - output: - set name, file("${fastq1.baseName}.trimmed.fq.gz") into trimmed_fastqs - file("${name}.fastp_stats.json") - file("${name}.fastp_stats.html") + if (!params.skip_gatk) { + process variantCallingGatk { + cpus params.cpus + memory params.memory + tag params.name + if (params.keep_intermediate) { + publishDir "${params.output}/${params.name}", mode: "copy" + } + + input: + set name, file(bam), file(bai) from preprocessed_bams3 + + output: + set name, file("${name}.gatk.vcf") into gatk_vcfs + + """ + gatk HaplotypeCaller \ + --input $bam \ + --output ${name}.gatk.vcf \ + --reference ${reference} \ + --ploidy 1 \ + --min-base-quality-score ${params.min_base_quality} \ + --minimum-mapping-quality ${params.min_mapping_quality} \ + --annotation AlleleFraction + """ + } + vcfs_to_normalize = vcfs_to_normalize == null? gatk_vcfs : vcfs_to_normalize.concat(gatk_vcfs) + } - """ - # --input_files needs to be forced, otherwise it is inherited from profile in tests - fastp \ - --in1 ${fastq1} \ - --out1 ${fastq1.baseName}.trimmed.fq.gz \ - --json ${name}.fastp_stats.json \ - --html ${name}.fastp_stats.html - """ + if (!params.skip_ivar) { + process variantCallingIvar { + cpus params.cpus + memory params.memory + tag params.name + publishDir "${params.output}/${params.name}", mode: "copy" + + input: + set name, file(bam), file(bai) from preprocessed_bams4 + + output: + file("${name}.ivar.tsv") + + """ + samtools mpileup \ + -aa \ + --count-orphans \ + --max-depth 0 \ + --redo-BAQ \ + --min-BQ ${params.min_base_quality} \ + --min-MQ ${params.min_mapping_quality} \ + ${bam} | \ + ivar variants \ + -p ${name}.ivar \ + -q ${params.min_base_quality} \ + -t 0.03 \ + -r ${reference} \ + -g ${gff} + """ + } } +} +else if (params.fasta) { - process alignmentSingleEnd { + process assemblyVariantCaller { cpus params.cpus memory params.memory tag params.name + if (params.keep_intermediate) { + publishDir "${params.output}/${params.name}", mode: "copy" + } input: - set name, file(fastq1) from trimmed_fastqs + val name from params.name + file fasta from file(params.fasta) output: - set name, file("${name}.bam") into bam_files + set name, file("${name}.assembly.vcf") into vcfs_to_normalize """ - # --input_files needs to be forced, otherwise it is inherited from profile in tests - nextflow run ${params.tronflow_bwa} \ - --input_name ${name} \ - --input_fastq1 ${fastq1} \ - --input_files false \ - --algorithm mem \ - --library ${library} \ - --output . \ + assembly_variant_caller.py \ + --fasta ${fasta} \ --reference ${reference} \ - --cpus ${task.cpus} --memory ${task.memory} \ - -profile ${workflow.profile} \ - -work-dir ${workflow.workDir} + --output-vcf ${name}.assembly.vcf """ } } -process bamPreprocessing { - cpus params.cpus - memory params.memory - tag params.name - if (params.keep_intermediate) { - publishDir "${params.output}/${params.name}", mode: "copy" - } - - input: - set name, file(bam) from bam_files - - output: - set name, file("${name}.preprocessed.bam"), file("${name}.preprocessed.bai") into preprocessed_bams, - preprocessed_bams2, preprocessed_bams3, preprocessed_bams4 - - - """ - # --input_files, --known_indels1 and --known_indels2 needs to be forced, otherwise it is inherited from test profile - nextflow run ${params.tronflow_bam_preprocessing} \ - --input_bam ${bam} \ - --input_files false \ - --output . \ - --reference ${reference} \ - --skip_bqsr --skip_metrics \ - --known_indels1 false --known_indels2 false \ - --prepare_bam_cpus ${params.cpus} --prepare_bam_memory ${params.memory} \ - --mark_duplicates_cpus ${params.cpus} --mark_duplicates_memory ${params.memory} \ - -profile ${workflow.profile} \ - -work-dir ${workflow.workDir} - - mv ${name}/${name}.preprocessed.bam ${name}.preprocessed.bam - mv ${name}/${name}.preprocessed.bai ${name}.preprocessed.bai - """ -} - -process variantCallingBcfTools { - cpus params.cpus - memory params.memory - tag params.name - if (params.keep_intermediate) { - publishDir "${params.output}/${params.name}", mode: "copy" - } - - input: - set name, file(bam), file(bai) from preprocessed_bams - - output: - set name, file("${name}.bcftools.bcf") into bcftools_vcfs - - """ - bcftools mpileup \ - --redo-BAQ \ - --max-depth 0 \ - --min-BQ ${params.min_base_quality} \ - --min-MQ ${params.min_mapping_quality} \ - --count-orphans \ - --fasta-ref ${reference} \ - --annotate AD ${bam} | \ - bcftools call \ - --multiallelic-caller \ - --variants-only \ - --ploidy 1 | \ - bcftools filter \ - --exclude 'INFO/IMF < ${params.low_frequency_variant_threshold}' \ - --soft-filter LOW_FREQUENCY - | \ - bcftools filter \ - --exclude 'INFO/IMF >= ${params.low_frequency_variant_threshold} && INFO/IMF < ${params.subclonal_variant_threshold}' \ - --soft-filter SUBCLONAL \ - --output-type b - > ${name}.bcftools.bcf - """ -} - -process variantCallingLofreq { - cpus params.cpus - memory params.memory - tag params.name - if (params.keep_intermediate) { - publishDir "${params.output}/${params.name}", mode: "copy" - } - - input: - set name, file(bam), file(bai) from preprocessed_bams2 - - output: - set name, file("${name}.lofreq.vcf") into lofreq_vcfs - - """ - lofreq call \ - --min-bq ${params.min_base_quality} \ - --min-alt-bq ${params.min_base_quality} \ - --min-mq ${params.min_mapping_quality} \ - --ref ${reference} \ - --call-indels \ - <( lofreq indelqual --dindel --ref ${reference} ${bam} ) | \ - bgzip -c > ${name}.lofreq.vcf.gz - - tabix -p vcf ${name}.lofreq.vcf.gz - - # annotates low frequency and subclonal variants - bcftools view -Ob ${name}.lofreq.vcf.gz | \ - bcftools filter \ - --exclude 'INFO/AF < ${params.low_frequency_variant_threshold}' \ - --soft-filter LOW_FREQUENCY - | \ - bcftools filter \ - --exclude 'INFO/AF >= ${params.low_frequency_variant_threshold} && INFO/AF < ${params.subclonal_variant_threshold}' \ - --soft-filter SUBCLONAL - > ${name}.lofreq.vcf - """ -} - -process variantCallingGatk { - cpus params.cpus - memory params.memory - tag params.name - if (params.keep_intermediate) { - publishDir "${params.output}/${params.name}", mode: "copy" - } - - input: - set name, file(bam), file(bai) from preprocessed_bams3 - - output: - set name, file("${name}.gatk.vcf") into gatk_vcfs - - """ - gatk HaplotypeCaller \ - --input $bam \ - --output ${name}.gatk.vcf \ - --reference ${reference} \ - --ploidy 1 \ - --min-base-quality-score ${params.min_base_quality} \ - --minimum-mapping-quality ${params.min_mapping_quality} \ - --annotation AlleleFraction - """ -} - -process variantCallingIvar { - cpus params.cpus - memory params.memory - tag params.name - publishDir "${params.output}/${params.name}", mode: "copy" - - input: - set name, file(bam), file(bai) from preprocessed_bams4 - - output: - file("${name}.ivar.tsv") - - """ - samtools mpileup \ - -aa \ - --count-orphans \ - --max-depth 0 \ - --redo-BAQ \ - --min-BQ ${params.min_base_quality} \ - --min-MQ ${params.min_mapping_quality} \ - ${bam} | \ - ivar variants \ - -p ${name}.ivar \ - -q ${params.min_base_quality} \ - -t 0.03 \ - -r ${reference} \ - -g ${gff} - """ -} - process variantNormalization { cpus params.cpus memory params.memory @@ -343,7 +408,7 @@ process variantNormalization { } input: - set name, file(vcf) from bcftools_vcfs.concat(lofreq_vcfs).concat(gatk_vcfs) + set name, file(vcf) from vcfs_to_normalize output: set name, file("${vcf.baseName}.normalized.vcf") into normalized_vcf_files diff --git a/nextflow.config b/nextflow.config index f4dd4f5..3529bfe 100644 --- a/nextflow.config +++ b/nextflow.config @@ -4,9 +4,9 @@ * ------------------------------------------------- */ -params.tronflow_bwa = "tron-bioinformatics/tronflow-bwa -r v1.4.0" +params.tronflow_bwa = "tron-bioinformatics/tronflow-bwa -r v1.4.1" params.tronflow_bam_preprocessing = "tron-bioinformatics/tronflow-bam-preprocessing -r v1.5.0" -params.tronflow_variant_normalization = "tron-bioinformatics/tronflow-variant-normalization -r v1.1.0" +params.tronflow_variant_normalization = "tron-bioinformatics/tronflow-variant-normalization -r v1.1.1" params.reference = "$baseDir/reference/Sars_cov_2.ASM985889v3.dna.toplevel.fa" params.gff = "$baseDir/reference/Sars_cov_2.ASM985889v3.101.gff3" @@ -18,7 +18,6 @@ profiles { conda { process.conda = "$baseDir/environment.yml" } debug { process.beforeScript = 'echo $HOSTNAME' } test { - params.input_files = "$baseDir/test_data/input_data.txt" params.cpus = 1 params.memory = "2g" timeline.enabled = false @@ -36,7 +35,10 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] -VERSION = '0.3.1' +cleanup = true +conda.createTimeout = '1 h' + +VERSION = '0.4.0' manifest { name = 'TRON-Bioinformatics/covigator-ngs-pipeline' @@ -48,9 +50,6 @@ manifest { version = VERSION } -cleanup = true -conda.createTimeout = '1 h' - params.help_message = """ Covigator NGS pipeline v${VERSION} @@ -58,10 +57,11 @@ Usage: nextflow run tron-bioinformatics/covigator-ngs-pipeline -profile conda --help Input: - * --fastq1: the first input FASTQ file + * --fastq1: the first input FASTQ file (not compatible with --fasta) + * --fasta: the FASTA file containing the assembly sequence (not compatible with --fastq1) * --name: the sample name, output files will be named after this name * --reference: the reference genome FASTA file, *.fai, *.dict and bwa indexes are required. - * --gff: the GFFv3 gene annotations file + * --gff: the GFFv3 gene annotations file (only optional with --fastq1) * --output: the folder where to publish output Optional input: @@ -72,8 +72,14 @@ Optional input: * --subclonal_variant_threshold: VAF superior threshold to mark a variant as subclonal (default: 0.8) * --memory: the ammount of memory used by each job (default: 3g) * --cpus: the number of CPUs used by each job (default: 1) + * --initialize: start the initialization of the conda environments + * -- skip_lofreq: skips calling variants with LoFreq + * -- skip_gatk: skips calling variants with GATK + * -- skip_bcftools: skips calling variants with BCFTools + * -- skip_ivar: skips calling variants with iVar Output: - * Output a normalized, phased and annotated VCF file for each of BCFtools, GATK and LoFreq + * Output a normalized, phased and annotated VCF file for each of BCFtools, GATK and LoFreq when FASTQ files are + provided or a single VCF obtained from a global alignment when a FASTA file is provided * Output a TSV file output from iVar """ diff --git a/test_data/hCoV-19_NTXX.fasta b/test_data/hCoV-19_NTXX.fasta new file mode 100644 index 0000000..b4dbdf0 --- /dev/null +++ b/test_data/hCoV-19_NTXX.fasta @@ -0,0 +1,375 @@ +>hCoV-19/Australia/NT12/2020|2020|2020-04-17 +ACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCA +CTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACG +GTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTG +TCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTT +GGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAA +AGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTA +TGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTG +GGCGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGC +CGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAAC +ATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGT +GGCCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTGTCCGAACA +ACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTT +CTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCA +AATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGG +TAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATC +ATTGTGGTGAAACTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAA +GAAGGTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGT +AGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTG +CCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATA +GGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAA +AGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAA +GTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTT +ACAAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATC +AGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGG +CCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCT +ACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGCTAACTAACATCTTTGG +CACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACG +GTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATT +AAGGAGAGTGTTCAGACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGC +TAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAG +AAGAAACTGGCCTACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTG +TTAACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATT +GGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGTACTGTGCCTTTGCACCTA +ATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATA +GAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTTGATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTC +TGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAAC +CAGTATCTGAATTACTTACACCACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCT +GGTGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATGAGGATGAAGAAGAAGGTGATTGTGAAGA +AGAAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATTTGGTGCCA +CTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATTGGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAA +GACGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATTAGAGATGGAACTTACACC +AGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGACA +TTGTGGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTT +GCAGGAGCCTTAAATAAGGCTACTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACCACTTAA +AGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAAACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAG +GTGAAGACATTCAACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCACCATTATTATCAGCT +GGTATTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTT +TGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGTGAAAAGCAAGTTGAACAAAAGATCGCTG +AGATTCCTAAAGAGGAAGTTAAGCCAGTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATGATAAGAAA +ATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAAACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGA +CATTAATGGCAATCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCTTAAAGAAAGATGCTCCAT +ATATAGTGGGTGATGTTGTTCAAGAGGGTGTTTTAACTGCTGTGGTTATACCTACTAAAAAGGCTGGTGGCACTACTGAA +ATGCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTACCCGGGTCAGGGTTTAAATGGTTACAC +TGTAGAGGAGGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGCCTTTTACATTCTACCATCTATTATCTCTAATGAGAAGC +AAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGCTTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTC +TGTGTGGAAACTAAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACAAGAGGGTGTGGTTGATTA +TGGTGCTAGATTTTACTTTTACACCAGTAAAACAACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTC +TTGTTACAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTG +CCAGCTACAGTTTCTGTTTCTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAAACACCTGA +AGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAAAGATTGGTCCTATTCTGGACAATCTACACAACTAGGTA +TAGAATTTCTTAAGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCACCTAGATGGTGAAGTTATC +ACCTTTGACAATCTTAAGACACTTCTTTCTTTGAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATTAA +CCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGTCCAACTTATTTGGATGGAGCTGATGTTA +CTAAAATAAAACCTCATAATTCACATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTGAGGCT +TTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGGTACATGTCAGCATTAAATCACACTAAAAAGTGGAAATA +CCCACAAGTTAATGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTGCATTGTTAACACTCCAAC +AAATAGAGTTGAAGTTTAATCCACCTGCTCTACAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGT +GCACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAGAAACAATGAGTTACTTGTTTCAACA +TGCCAATTTAGATTCTTGCAAAAGAGTCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAACCCTTAAGGGTG +TAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAATTTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAA +CAAGCTACAAAATATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGCTCAGTATGAACTTAAGCA +TGGTACATTTACTTGTGCTAGTGAGTACACTGGTAATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTT +TGTATTGCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAAC +AGTTACACAACAACCATAAAACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAGTTGGACAA +TTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAATTGATCTTGTACCAAACCAACCATATCCAAACGCAAGCT +TCGATAATTTTAAGTTTGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACTGGTTATAAGAAACCTGCT +TCAAGAGAGCTTAAAGTTACATTTTTCCCTGACTTAAATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCCTC +TTTTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAACAATGCAACTAATAAAGCCACGTATAAAC +CAAATACCTGGTGTATACGTTGTCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAG +GACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACA +GAAAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCAGCAAATAATAGTTTAA +AAATTACAGAAGAGGTTGGCCACACAGATCTAATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAAT +GAATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTGCTGTTAATAGTGTCCCTTGGGATACTAT +AGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTTTAAACCGTGTTT +GTACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAA +GCATCTATGCCGACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTCATTTAATTATTT +GAAGTCACCTAATTTTTCTAAACTGATAAATATTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCT +ACTCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAGGCTATTTG +AACTCTACTAATGTCACTATTGCAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTTTAGATTCTTT +AGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTG +CAGAGTGGTTTTTGGCATATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTC +AGCTATTTTGCAGTACATTTTATTAGTAATTCTTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTC +AGCTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTA +ATTCATCAACTTGTATGATGTGTTACAAACGTAATAGAGCAACAAGAGTCGAATATACAACTATTGTTAATGGTGTTAGA +AGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATTGTGATACATT +CTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAAGACCAATAAATCCTACTG +ACCAGTCTTCTTACATCGTTGATAGTGTTACAGTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAG +ACTTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAGCTAATAACACTAAAGGTTCATTGCCTAT +TAATGTTATAGTTTTTGATGGTAAATCAAAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTCAGCTTA +TGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTT +GATGCTTACGTTAATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTTGCAACTGCAGAAGC +TGAACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAG +ATGTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGACATAGAAGTTACTGGCGATAGTTGTAAT +AACTATATGCTCACCTATAACAAAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGTGCGCGTCA +TATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGATATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAAC +TACGAAAACAAATACGTAGTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCAACTACTAGACAAGTTGTT +AATGTTGTAACAACAAAGATAGCACTTAAGGGTGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACT +TGTGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTCATGTCTAAACATACTGACTTTTCAAGTG +AAATCATAGGATACAAGGCTATTGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTAACAAACAT +GCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTATACTAATGACAAAGCTTGCCCATTGATTGCTGCAGTCAT +AACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTAATGGTGACTTTTTGCATTTCT +TACCTAGAGTTTTTAGTGCAGTTGGTAACATCTGTTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCA +GCTTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGCCAGTACCATATTGTTATGATACCAATGT +ACTAGAAGGTTCTGTTGCTTATGAAAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTATTCAATTTC +CTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTTTTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGA +TCAGAAGCTGGTGTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGATCTTTACCAGGAGTTTT +CTGTGGTGTAGATGCTGTAAATTTACTTACTAATATGTTTACACCACTAATTCAACCTATTGGTGCTTTGGACATATCAG +CATCTATAGTAGCTGGTGGTATTGTAGCTATCGTAGTAACATGCCTTGCCTACTATTTTATGAGGTTTAGAAGAGCTTTT +GGTGAATACAATCATGTAGTTGCCTTTAATACTTTACTATTCCTTATGTCATTCACTGTACTCTGTTTAACACCAGTTTA +CTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTTGACATTTTATCTTACTAATGATGTTTCTTTTTTAGCAC +ATATTCAGTGGATGGTTATGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATCATTTGTATTTCCACAAAG +CATTTCTATTGGTTCTTTAGTAATTACCTAAAGAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGC +TGCGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGTGATGTGCTATTACCTCTTACGCAATATA +ATAGATACTTAGCTCTTTATAATAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAGCTGCTTGT +TGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGTTCTGATGTTCTTTACCAACCACCACAAACCTCTATCAC +CTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAGGGTTGTATGGTACAAGTAACTT +GTGGTACAACTACACTTAACGGTCTTTGGCTTGATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGAC +ATGCTTAACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATTTCTTGGTACAGGCTGGTAATGTTCAACT +CAGGGTTATTGGACATTCTATGCAAAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACCTAAGTATA +AGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAGCTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGT +GCTATGAGGCCCAATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGGTTTTAACATAGATTATGA +CTGTGTCTCTTTTTGTTACATGCACCATATGGAATTACCAACTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTT +ATGGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACACAACTATTACAGTTAATGTTTTAGCTTGGTTG +TACGCTGCTGTTATAAATGGAGACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTTGTGGCTAT +GAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACATACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTT +TAGATATGTGTGCTTCATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTGGGTAGTGCTTTATTAGAA +GATGAATTTACACCTTTTGATGTTGTTAGACAATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGGG +TACACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTCCAGAGTACTCAATGGTCTTTGTTCTTTT +TTTTGTATGAAAATGCCTTTTTACCTTTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTGTCAAACAT +AAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCCACTGTAGCTTATTTTAATATGGTCTATATGCCTGCTAG +TTGGGTGATGCGTATTATGACATGGTTGGATATGGTTGATACTAGTTTGTCTGGTTTTAAGCTAAAAGACTGTGTTATGT +ATGCATCAGCTGTAGTGTTACTAATCCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTT +ATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAAT +CTCTGTTACTTCTAACTACTCAGGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTTGAGT +ATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGCTAGTTTATTGTTTCTTAGGCTATTTTTGTACT +TGTTACTTTGGCCTCTTTTGTTTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTACTTAGTTTCTACACA +GGAGTTTAGATATATGAATTCACAGGGACTACTCCCACCCAAGAATAGCATAGATGCCTTCAAACTCAACATTAAATTGT +TGGGTGTTGGTGGCAAACCTTGTATCAAAGTAGCCACTGTACAGTCTAAAATGTCAGATGTAAAGTGCACATCAGTAGTC +TTACTCTCAGTTTTGCAACAACTCAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACAT +TCTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACTTTCTGTTTTGCTTTCCATGCAGGGTGCTG +TAGACATAAACAAGCTTTGTGAAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTTAGTTCCCTT +CCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGAGCAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAA +AAAGTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCATGCAACGTAAGTTGGAAAAGATGG +CTGATCAAGCTATGACCCAAATGTATAAACAGGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACA +ATGCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACAACATTATCAACAATGCAAGAGATGGTTGTGTTCC +CTTGAACATAATACCTCTTACAACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATATAAAAATACGTGTG +ATGGTACAACATTTACTTATGCATCAGCATTGTGGGAAATCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTT +AGTGAAATTAGTATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAGGGCCAATTCTGCTGTCAA +ATTACAGAATAATGAGCTTAGTCCTGTTGCACTACGACAGATGTCTTGTGCTGCCGGTACTACACAAACTGCTTGCACTG +ATGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTTTGTACTTGCACTGTTATCCGATTTACAGGATTTG +AAATGGGCTAGATTCCCTAAGAGTGATGGAACTGGTACTATCTATACAGAACTGGAACCACCTTGTAGGTTTGTTACAGA +CACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTAAAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTA +GTTTAGCTGCCACAGTACGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCT +TTTGCTGTAGATGCTGCTAAAGCTTACAAAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTT +GTGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGCCAATATGGATCAAGAATCCTTTGGTGGTGCAT +CGTGTTGTCTGTACTGCCGTTGCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAGGTAAGTATGTACAA +ATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACTTAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGG +TTATGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCACAATCGTTTTTAAACGGGTTTGCGG +TGTAAGTGCAGCCCGTCTTACACCGTGCGGCACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATG +ATAAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTTGTCGCTTCCAAGAAAAGGACGAAGATGACAATTTAATT +GATTCTTACTTTGTAGTTAAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAATTTACTTAAGGATTG +TCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAGACGGTGACATGGTACCACATATATCACGTCAACGTCTTA +CTAAATACACAATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGACACATTAAAAGAAATACTT +GTCACATACAATTGTTGTGATGATGATTATTTCAATAAAAAGGACTGGTATGATTTTGTAGAAAACCCAGATATATTACG +CGTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTTAAAAACAGTACAATTCTGTGATGCCATGCGAAATGCTG +GTATTGTTGGTGTACTGACATTAGATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATACAAACCACG +CCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATTGTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGC +AGAGTCACATGTTGACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAATATGACTTCACGGAAGAGAGGT +TAAAACTCTTTGACCGTTATTTTAAATATTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAGATGC +ATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACTTACAAGTTTTGGACCACTAGTGAGAAAAAT +ATTTGTTGATGGTGTTCCATTTGTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACATAATCAGGATGTAA +ACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTGTATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAAT +CTATTACTAGATAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACAATGTTGCTTTTCAAACTGTCAAACCCGG +TAATTTTAACAAAGACTTCTATGACTTTGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACT +TCTTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTACTATCGTTATAATCTACCAACAATGTGTGATATC +AGACAACTACTATTTGTAGTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATTAATGCTAACCAAGT +CATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTAATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGA +GTTATGAGGATCAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCCTACTATAACTCAAATGAATCTTAAGTAT +GCCATTAGTGCAAAGAATAGAGCTCGCACCGTAGCTGGTGTCTCTATCTGTAGTACTATGACCAATAGACAGTTTCATCA +AAAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTAGTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACA +ACATGTTAAAAACTGTTTATAGTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGATAGAGCCATG +CCTAATATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCGCAAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTA +TAGATTAGCTAATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCGGTTCACTATATGTTAAACCAGGTGGAA +CCTCATCAGGAGATGCCACAACTGCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAATGCA +CTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTACAACACAGACTTTATGAGTGTCTCTATAG +AAATAGAGATGTTGACACAGACTTTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAATGATGATACTCTCTG +ACGATGCTGTTGTGTGTTTCAATAGCACTTATGCATCTCAAGGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTT +TATTATCAAAACAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTAAAGGACCTCATGAATTTTG +CTCTCAACATACAATGCTAGTTAAACAGGGTGATGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGG +CCGGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTGAACGGTTCGTGTCTTTAGCTATAGATGCT +TACCCACTTACTAAACATCCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATACATAAGAAAGCTACATGA +TGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGCTTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGT +TTTATGAGGCTATGTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTGCAATTCACAGACTTCATTA +AGATGTGGTGCTTGCATACGTAGACCATTCTTATGTTGTAAATGCTGTTACGACCATGTCATATCAACATCACATAAATT +AGTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGTGATGTCACAGATGTGACTCAACTTTACTTAGGAGGTA +TGAGCTATTATTGTAAATCACATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGTTTATATAAA +AATACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCAATTGCAACATGTGACTGGACAAATGCTGGTGATTACAT +TTTAGCTAACACCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACTGAGGAGACATTTAAACTGT +CTTATGGTATTGCTACTGTACGTGAAGTGCTGTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAGACCA +CCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAAACAGTAAAGTACAAATAGGAGAGTACACCTTTGA +AAAAGGTGACTATGGTGATGCTGTTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGATTATTTTGTGCTGA +CATCACATACAGTAATGCCATTAAGTGCACCTACACTAGTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCA +ACACTCAATATCTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTATGCAAAAGTATTCTACACTCCA +GGGACCACCTGGTACTGGTAAGAGTCATTTTGCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAG +CTTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATTTGCCTATAGATAAATGTAGTAGAATTATA +CCTGCACGTGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTATGTCTTTTGTACTGTAAA +TGCATTGCCTGAGACGACAGCAGATATAGTTGTCTTTGATGAAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCA +ATGCCAGATTACGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCTGCACCACGCACATTGCTAACTAAG +GGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTG +TCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGATAATAAGCTTAAAGCACATAAAGACAAAT +CAGCTCAATGCTTTAAAATGTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCACAAATAGGC +GTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAAAGCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGT +AGCCTCAAAGATTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGCTCAGAATATGACTATGTCATATTCACTC +AAACCACTGAAACAGCTCACTCTTGTAATGTAAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGC +ATAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTCTTGAAATTCCACGTAGGAATGTGGCAACTTTACA +AGCTGAAAATGTAACAGGACTCTTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCACCTACACACC +TCAGTGTTGACACTAAATTCAAAACTGAAGGTTTATGTGTTGACATACCTGGCATACCTAAGGACATGACCTATAGAAGA +CTCATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTTTATCACCCGCGAAGAAGCTAT +AAGACATGTACGTGCATGGATTGGCTTCGATGTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTT +TACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTTATGTTGATACACCTAATAATACAGATTTT +TCCAGAGTTAGTGCTAAACCACCGCCTGGAGATCAATTTAAACACCTCATACCACTTATGTACAAAGGACTTCCTTGGAA +TGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGACACACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGG +CACATGGCTTTGAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGTTGTCTATGTGATAGACGT +GCCACATGCTTTTCCACTGCTTCAGACACTTATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCGTT +TATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAACCATGATCTGTATTGTCAAGTCCATGGTAATG +CACATGTAGCTAGTTGTGATGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTTAAGCGTGTTGACTGGACT +ATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGCGGCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGC +ATTATTAGCAGACAAATTCCCAGTTCTTCACGACATTGGTAACCCTAAAGCTATTAAGTGTGTACCTCAAGCTGATGTAG +AATGGAAGTTCTATGATGCACAGCCTTGTAGTGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACAT +TCTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATGTCGATAGATATCCTGCTAATTCCATTGTTTGTAG +ATTTGACACTAGAGTGCTATCTAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATAAACATGCATTCC +ACACACCAGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAACAATTACCATTTTTCTATTACTCTGACAGTCCATGTGAG +TCTCATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTATAACACGTTGCAATTTAGG +TGGTGCTGTCTGTAGACATCATGCTAATGAGTACAGATTGTATCTCGATGCTTATAACATGATGATCTCAGCTGGCTTTA +GCTTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGAACACTTTTACAAGACTTCAGAGTTTAGAAAATGTGGCT +TTTAATGTTGTAAATAAGGGACACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCATTAATAACACTGTTTACAC +AAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAATAAAACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTA +AGCGCAACATTAAACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGCTGCTAATACTGTGATCTGG +GACTACAAAAGAGATGCTCCAGCACATATATCTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAACCAACTGA +AACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGATGGTCAAGTAGACTTATTTAGAAATGCCCGTAATG +GTGTTCTTATTACAGAAGGTAGTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTCTTAATGGAGTCACA +TTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAGAAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTA +CTTTACTCAGAGTAGAAATTTACAAGAATTTAAACCCAGGAGTCAAATGGAAATTGATTTCTTAGAATTAGCTATGGATG +AATTCATTGAACGGTATAAATTAGAAGGCTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGT +GGTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTTGAATTAGAAGATTTTATTCCTATGGACAG +TACAGTTAAAAACTATTTCATAACAGATGTGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATTTATTACTTG +ATGATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTAGTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAA +ATTTCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATTACAATCTAGTCAAGCGTGGCAACC +GGGTGTTGCTATGCCTAATCTTTACAAAATGCAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTG +CAACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCAACTGTGTCAATATTTAAACACATTAACATTAGCT +GTACCCTATAATATGAGAGTTATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACAGCTGTTTTAAGACA +GTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGATCTTAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTG +ATTGTGCAACTGTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCCTAAGACTAAAAATGTTACA +AAAGAAAATGACTCTAAAGAGGGTTTTTTCACTTACATTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGAGGTTCCGT +GGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCTCATGGGACACTTCGCATGGTGGACAGCCTTTG +TTACTAATGTGAATGCGTCATCATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCACGCGAACAAATAGAT +GGTTATGTCATGCATGCAAATTACATATTTTGGAGGAATACAAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACAT +GAGTAAATTTCCCCTTAAATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATCAATGATATGATTTTATCTC +TTCTTAGTAAAGGTAGACTTATAATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACG +AACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCC +CTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAG +GACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTT +TGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTT +TTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAA +TTTCAATTTTGTAATGATCCATTTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGT +TTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATT +TCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTA +GTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCA +AACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACANNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCA +GATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCA +TCATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTC +ATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTAC +CAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTG +TATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACC +TTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACC +AACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAAT +TTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTT +TCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTG +ACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTAT +CAGGGTGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGG +TTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGACATACCCA +TTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATC +ATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTAC +TATTAGTGTTACCACAGAAATTCTACCCGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATT +CAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAATAGCTGTT +GAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGG +TTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAG +TGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCA +CAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGC +GGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGT +TTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGC +AAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTT +AAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACA +AAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTA +ATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAG +AGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGA +CTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAA +GGTGTCTTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAA +CACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAG +ACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGGCATT +AATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGA +TCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTG +CCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCC +TGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACATAAACGAACTTATGGAT +TTGTTTATGAGAATCTTCACAATTGGAACTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAGATTTTGT +TCGCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGCACTTCTTGCTGTTT +TTCAGAGCGCTTCCAAAATCATAACCCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGGGTGTTCACTTTGTTTGCAAC +TTGCTGTTGTTGTTTGTAACAGTTTACTCACACCTTTTGCTCGTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTTTA +TGCTTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAATAATAATGAGGCTTTGGCTTTGCTGGAAATGCCGTTCCA +AAAACCCATTACTTTATGATGCCAACTATTTTCTTTGCTGGCATACTAATTGTTACGACTATTGTATACCTTACAATAGT +GTAACTTCTTCAATTGTCATTACTTCAGGTGATGGCACAACAAGTCCTATTTCTGAACATGACTACCAGATTGGTGGTTA +TACTGAAAAATGGGAATCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAGACTATTACCAGCTGTACT +CAACTCAATTGAGTACAGACACTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGCCTGAAGAA +CATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGAACCAATTTATGATGAACCGACGAC +GACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAGACAGGTACGT +TAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTATTCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGA +TTGTGTGCGTACTGCTGCAATATTGTTAACGTGAGTCTTGTAAAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCT +GAATTCTTCTAGAGTTCCTGATCTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTT +AGCCATGGCAGATTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTGAACAATGGAACCTAGTAATAGGTT +TCCTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTAAGTTAATT +TTCCTCTGGCTGTTATGGCCAGTAACTTTAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAATAAATTGGATCACCGGTGG +AATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTCATTGCTTCTTTCAGACTGTTTGCGCGTA +CGCGTTCCATGTGGTCATTCAATCCAGAAACTAACATTCTTCTCAACGTGCCACTCCATGGCACTATTCTGACCAGACCG +CTTCTAGAAAGTGAACTCGTAATCGGAGCTGTGATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTG +TGACATCAAGGACCTGCCTAAAGAAATCACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCTTCGCAGC +GTGTAGCAGGTGACTCAGGTTTTGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTCCAGT +AGCAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCAGGTTACTATAGCAG +AGATATTACTAATTATTATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATAAACCTCATAATTAAAAAT +TTATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAACCAATGGAGATTGATTAAACGAACATGAA +AATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGAGCTTTATCACTACCAAGAGTGTGTTAGAGGTACAACAG +TACTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCA +CTGACTTGCTTTAGCACTCAATTTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGT +TTCACCTAAACTGTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGT +TTATAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAATTGACTTCTATTTGTGCTTTTTAG +CCTTTCTGCTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAAGATCATAATGAAACTTGT +CACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTT +ACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAG +TAGGAGCTAGAAAATCAGCACCTTTAATTGAATTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGAT +ATCGGTAATTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTG +TTCGTTCTATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTAAAATGTC +TGATAATGGACCCCAAAATCAGCGAAATGCACCCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAGTAACCAGA +ATGGAGAACGCAGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACTGCGTCTTGGTTCACCGCT +CTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTCCAATTAACACCAATAGCAGTCCAGATGA +CCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGGTGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGT +ATTTCTACTACCTAGGAACTGGGCCAGAAGCTGGACTTCCCTATGGTGCTAACAAAGACGGCATCATAGGGGTTGCAACT +GAGGGAGCCTTGAATACACCAAAAGATCACATTGGCACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCC +TCAAGGAACAACATTGCCAAAAGGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCAC +GTAGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAGGGGAACTTCTCCTGCTAGAATGGCTGGCAATGGCGGT +GATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGGTAAAGGCCAACAACAACA +AGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACA +ATGTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCCAAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGA +ACTGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCGCATTGGCAT +GGAAGTCACACCTTCGGGAACGTGGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATC +AAGTCATTTTGCTGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAG +GCTGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGATTTGGATGA +TTTCTCCAAACAATTGCAACAATCCATGAGCAGTGCTGACTCAACTCAGGCCTAAACTCATGCAGACCACACAAGGCAGA +TGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTCTCGTAACTACATA +GCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCTTTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGC +CACCACATTTTCACCGAGGCCACGCGGAGTACGATCGAGTGTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAG +AGCCCTAATGTGTAAAATTAATTTTAGTAGTGCTATCCCCATGTGATTTTAATNNNNNNNNNNNNNNNNNNNNNAAAAAA +AAAAAAAAAAAAAAAAAAAAAA