From cb10e2ddafb649772d0bee1d6d822cfb98a8255a Mon Sep 17 00:00:00 2001 From: TomSmithCGAT Date: Mon, 11 Feb 2019 13:45:21 +0000 Subject: [PATCH 1/5] adds distance-based knee method and makes default --- tests/indrop_whitelist.tsv | 197 +++++- tests/indrop_whitelist_3_errors.tsv | 225 ++++++- tests/indrop_whitelist_density.tsv | 414 ++++++++++++ .../indrop_whitelist_ed_above_thres_corr.tsv | 161 ++++- .../indrop_whitelist_ed_above_thres_disc.tsv | 179 +++++- tests/indrop_whitelist_user.tsv | 123 +++- tests/indrop_whitelist_user_density.tsv | 607 ++++++++++++++++++ tests/scrb_whitelist.tsv | 286 ++++++++- tests/tests.yaml | 21 +- umi_tools/whitelist.py | 118 ++-- umi_tools/whitelist_methods.py | 181 +++++- 11 files changed, 2401 insertions(+), 111 deletions(-) create mode 100644 tests/indrop_whitelist_density.tsv create mode 100644 tests/indrop_whitelist_user_density.tsv diff --git a/tests/indrop_whitelist.tsv b/tests/indrop_whitelist.tsv index 1f0f7ca4..0013184d 100644 --- a/tests/indrop_whitelist.tsv +++ b/tests/indrop_whitelist.tsv @@ -1,19 +1,29 @@ +AAAACCTCCCGACTCCT AAAACCTCCCGACACCT 3 1 +AAAACCTCCTGTACACG 3 AAAACGCCTATGACTTT 5 AAAACTCGACATCGCAG 5 AAAAGTCGGTCCTTATT 6 AAAATCGTTGCTCTAGT AAAATCGTTACTCTAGT 6 1 +AAAATCGTTGGTGGGAT AAAATCGTTAGTGGGAT 4 1 AAAGGTAATACACTAAG 10 AAAGGTAATAGAGACTA 8 AAAGTAATCAGAACGGG 7 +AAAGTAATCCGGCACAT AAAGTAATCAGGCACAT 3 1 AAATCGGGTCACGGACT 7 AAATCGGGTGGCGTTAG 11 AAATGAATGTGTAGTTT AAATGAATGTGTGGTTT 9 1 +AAATGACCCGAGCGGTA AAATGACCCAAGCGGTA 4 1 +AAATGACCCTAACCATC 3 AACCCATATTAACCATC 15 +AACGAAACGATGGAAAT 3 +AACGAAACGGAGCCCAT 3 AACGAAACGGTTCAACT 15 AACTCACCGACGTATAC AACTCACCGGCGTATAC 6 1 AACTCACCGAGGAAGAC 9 AACTCACCGCCTATTCA 9 +AACTCACCGTGACCCTC 4 AACTGAGTTTGCCTCAC 7 +AACTTAGCGTCCAAAG 4 AAGACACCACTAGATTG 7 AAGATGGCTATGGGCAC 11 AAGATGGCTGATCTCGG 6 @@ -22,6 +32,8 @@ AAGATGGCTTGGCTACC AAGATGGCTAGGCTACC 11 1 AAGCGAAGTATTCAGTA 10 AAGCGAAGTCTCCGCAT 8 AAGCGAAGTTCACCGAG 5 +AAGCGCCTTGCTTTGGC AAGCGCCTTACTTTGGC 4 1 +AAGCGCCTTGGTGGGAT 3 AAGCGCCTTTGTTTGAG 6 AAGCTACGGTCATATGG 10 AAGCTTCTCTTTCTTT 5 @@ -31,19 +43,30 @@ AATACTCTTAATTCCCA 13 AATACTCTTACACTAAG 10 AATACTCTTCACAAGGC AATACTCTTGACAAGGC 5 1 AATATACCTAGGCAGTT 8 +AATATCTTCATGACTTT 3 +AATATCTTCGGGAAATC AATATCTTCAGGAAATC 3 1 AATCCCACGATCAGCGC 11 AATCCCACGCACGGACT 9 +AATGCGGATTCCAGTCC AATGCGGATTCCAGTCA 3 1 AATGTTTGCCCTTCAG 6 AATGTTTGGGGAACCT AATGTTTGAGGAACCT 5 2 +ACAGATTACCGAACGTA 4 +ACAGATTACCTTACTCC 3 ACAGGTGTCAAATCAGA 9 +ACAGGTGTCCCCAAGCA 3 +ACCATTTGAGTTGTCAT 4 ACCATTTGATCCCAATC 7 ACCCACGAGGGTTGGT ACCCACGAAGGTTGGT 9 1 +ACCCACGATAACTACT 4 ACCCACGATTGAGGGT 6 ACCCGACTTGCAAGGAC ACCCGACTTACAAGGAC 9 1 ACCCGACTTGTTGTCAT ACCCGACTTATTGTCAT 6 1 +ACCCGACTTTCATATGG ACCCGACGTTCATATGG 4 1 ACCCGACTTTGCAAGGG 9 ACCCTAACCATGGATTA 10 ACCCTTGGTAAGCGTAC 5 +ACCCTTGGTATGTTGGC 4 +ACCGCAACTAGAGGTGG 4 ACCGTGTTTCTGTTCTT 5 ACCGTGTTTTGGTTTCT 10 ACCTTCTTACTTCAAT 6 @@ -52,62 +75,92 @@ ACCTTCTTTGCGTATC 11 ACCTTCTTTTGGATCG 12 ACGCTCTCAACTAGCCA 6 ACGTGCTAGTCGGTTTA 17 +ACTACTTGTTTCCGAGT 4 ACTAGATTGACTAGGAT 11 ACTAGATTGTCGGTACG 14 +ACTGCGTTGAAGCTTCT 3 ACTTAGGTAAGTGATGC 7 ACTTCAATCTTTAATC 5 ACTTCAATTGGGATTC 14 +AGAACGATTTGTAAAGG 3 AGAGACTAACTGCCGT 8 +AGAGACTACCGCTGTT 3 AGAGACTATGAAATGA AGAGACTATGTAATGA 11 1 AGAGACTATGACGGAC 10 AGAGGTGGCCCATCTG 9 AGAGGTGGCGACGTCA 5 AGATGTATTAAGCTTCT AGATGTATTACGCTTCT 7 1 AGATGTATTCCACATTA 8 +AGATGTATTCGATTGAT 3 AGATGTATTTGTACACG 10 AGCAACCTGCGGCTTAC 8 +AGCACCACCATCTCCC 4 AGCACCTCTTACCAGGC 6 +AGCCAAGATGTTGTCAT 4 AGCCAAGATTGAGGTCT 13 AGCCTCTTTAATTCCCA AGCCTCTTTAATTCCCC,AGCCTCTTTGATTCCCA 10 1,1 AGCCTCTTTCAGTCCCT 8 +AGCTTTCCATACAGCCG 4 AGGCAACGTCATATGG 13 AGGCAACGTTTAACAG 9 AGGCCTAAGGGTGGGAT 5 AGGGAACGACTTCGCAC 9 AGGGAACGATTGCATAT 9 AGGGAAGGTGGCGTTAG CGGGAAGGTGGCGTTAG 9 1 +AGGGAAGGTTACTTGTG 3 AGGGCCAATTCTCACTT 5 AGGGTGTATCAGTTTGC 8 +AGGGTGTATCTTGGTGT 4 AGGGTGTATGACAGATA CGGGTGTATGACAGATA 5 1 +AGGTGACACCGAGATGT 4 AGGTTAGTGAAGGTAAT 10 +AGGTTAGTGACCACGCT 3 AGGTTAGTGTCGAAGCT CGGTTAGTGTCGAAGCT 6 1 +AGTATGAGTAGAGGTGG 4 AGTATGAGTCCCATCTG 5 AGTATGAGTTGCCTCAC AGTATGAGTAGCCTCAC 7 1 AGTCAAAGACCTGACAC 7 AGTCAATACACTGCCGT CGTCAATACACTGCCGT 5 1 +AGTCAATACTTGCATAT 4 +AGTGGATGGGAGAGTAT 3 AGTGTCGGACGGGCTTT 6 AGTTTACGTACTAGGAT 5 +AGTTTACGTCCCTAACC 4 +AGTTTACGTTGACGGAC 3 +AGTTTACGTTTAAACTG 4 ATAGTCGCAATCAAGTG 6 ATAGTGGACCGAGATGT ATAGTGGACGGAGATGT 6 1 +ATATGCATGTACCTTG 4 ATCACGTTTAATGACCC ATCACGTTTACTGACCC 10 1 ATCACGTTTCCACATTA 8 ATCACGTTTCGAATAAT 5 ATCACGTTTTCATATGG 7 +ATCCGCTAGGATAAAG 3 ATCCGCTATGTAAAGG 6 ATCGGTTCGAACCTGAC 11 ATCGGTTCGCAAGTCAT 6 ATCGGTTCGGATTAGAC 10 +ATCTGCATCAGCAGAAC GTCTGCATCAGCAGAAC 4 1 +ATCTTTGTCTTCAGGT ATCTTTGTCTTCAGGA,CTCTTTGTCTTCAGGT 3 1,1 ATCTTTGTGTCCATGT ATCTTTGTATCCATGT 6 1 ATCTTTGTTAACCATC 7 ATGCTCCGTGTTACGAT 9 +ATGCTCCGTTTATCTGT 4 ATGGCCTGTAATGTTTG 9 ATGGCCTGTAGCACCAC 7 +ATGGCCTGTGTCCGTAC 3 +ATGGCCTGTGTTGTCAT 4 +ATGGCCTGTTGACGGAC 3 ATGTGTCCTATGGAAAT ATGTGTCCTGTGGAAAT 13 1 ATGTGTCCTGCTTTGGC ATATGTCCTGCTTTGGC 6 1 ATGTGTCCTTAACCATC 6 ATTCCAGACTTGATCTA 6 ATTCCAGACTTGTGACT 7 +ATTGATTCTATCAAGTG ATTGATTCTGTCAAGTG 3 1 +ATTGATTCTGTTAACCA ATTGATTCTATTAACCA,TTTGATTCTGTTAACCA 4 1,1 ATTGATTCTTGTAAAGG ATTGATTCTTGTAAAGA 6 1 +ATTGGATCGATGTCGGA 4 +ATTGGATCGGTGTCGGA 3 ATTGTGACTTCCGTCCA 6 ATTTCCGGCGAATAAT CTTTCCGGCGAATAAT 8 1 ATTTCCGGGTTACGAT ATTTCCGGATTACGAT 8 1 @@ -115,13 +168,19 @@ ATTTCCGGTACTTGTG 9 CAGTTTGCGGCGTTAG CAGTTTACGGCGTTAG 7 1 CATCGCAGCGACTCCT 5 CATCGCAGCTTCAGGT 6 +CATCGCAGTAGCCTCG 4 CATCGCAGTTATAGCC 14 +CATCGCAGTTTATCAC 4 CCAACCGTCCACATTA 7 CCAACCGTCGGGCTTT 9 CCAACCGTGTTCAACT 12 CCAACCGTTAGTCTAG 7 +CCCATCTGTCTGTGGT CCCATCTGTCCGTGGT,CCCATCTGTCTGTGAT 4 1,1 +CCCATCTGTTGGATCG 3 CCGCTGTTATCAGCGC 8 +CCGCTGTTTGCAAGGG CCGCTGTTCGCAAGGG,CCGCTGTTTGCAAAGG 4 1,1 CGAACGTAATACTCTT CAAACGTAATACTCTT 7 1 +CGAACGTACATTTGTT 4 CGAACGTAGGCGTTAG CGAACGTAGGAGTTAG 9 1 CGAACGTATCCCAATC 10 CGCTAATAGCATGGGT 5 @@ -130,10 +189,14 @@ CGGCTTACAGAGGTGG 6 CGTATTTCTTGCATAT CGTAATTCTTGCATAT 9 1 CTATAGAGCGAACGTA 5 CTATAGAGTGCCATCG 6 +CTATAGAGTTTGTGTC CTATAGAGTTTGTGTA 4 1 +CTTACGGGCCCATCTG 4 +CTTACGGGCGGCACAT 4 CTTACGGGTAGCTTAT 10 CTTCAGGTGGATAAAG 9 CTTCAGGTTAGCTTAT 9 CTTCGATTCTCGCGTA 6 +CTTCGATTGAAGTGCC 4 CTTCGATTGCGTTGCT CTTCGATTTCGTTGCT 7 1 CTTCGATTTAAATAGG CTTCGATTTAAATAGT 11 1 CTTCGATTTGGGATTC 9 @@ -142,83 +205,128 @@ CTTTAATCCGCTCTCA 6 CTTTAATCGGGAGGTA CTTTAATCAGGAGGTA,CTTTAATCTGGAGGTA 7 2,1 CTTTCTTTATTCCTTG 5 CTTTCTTTGGAGAAGC CTTTCTTTAGAGAAGC 5 2 -GAAAGATTGTGCTTACCT GAAAGATTGTACTTACCT 8 3 +GAAAGATTGTACTTACCT 3 +GAAAGATTGTGCTTACCT 8 GAAAGCGTACGTTAACCA GAAAGCGTACATTAACCA 8 1 GAACACTAAGATGGAAAT 8 +GAACACTAAGCCCAAGCA 3 GAACACTAAGTGGTATGA 7 +GAACACTAAGTTGGACTT 3 GAACGCCATTATTTCCGG 8 GAACGCCATTCATCGCAG 11 GAACGCCATTTCTCAACC 13 +GAACTAGGATTGCAAGGG 4 GAACTGCCGTGTCAATAC GAACTGCCGTTTCAATAC 11 1 GAAGGAAGACCAGATTAC 6 GAAGGCAGTTATATGCAT 9 GAAGGCAGTTCAGTTTGC 10 GAAGTTTAGAAGGCAACG 11 +GAATCAAGTGGGATAAAG 4 GAATTCCTTGATTGGGCC 6 GAATTCCTTGTGACCCTC GAATTCCTTGAGACCCTC 7 1 +GACAATTAGTCTATAGAG GACAATTAGTCTATAAAG 3 1 GACACGGACTACCTTGCC 7 GACACGGACTCACGGACT GACACGGACTCGCGGACT 9 1 GACACGGACTCCGCAACT 6 +GACACGGACTCGGGCTTT 3 GACACGGACTTTAAGCGT 5 GACAGTCCCTTAACCATC 6 GACATTTGTTGAATACGC GACATTTGTTGAATACGA 13 1 GACATTTGTTTTTAACAG 7 +GACCACATTACAATTAGT 3 GACCACATTATGCAAGGG 9 +GACCCATAGCTGAGGTCT 3 GACCCGAATGAATGACCC 8 +GACCCGAATGGCATGGGT 3 GACCCGAATGTGACCAGT 7 +GACCCTTCAGATTCCTTG GACTCTTCAGATTCCTTG 4 1 GACCTACTAGCCTACTAG GACCTACTAGCCTACTAA 11 1 GACCTACTAGGGTGGGAT GACCTACTAGAGTGGGAT 11 1 GACCTACTAGTGCGTATC 6 +GACCTACTAGTTCACATA 4 +GACCTGACACTAACCATC 4 GACGAATAATAAGATTGT 5 +GACGACTCCTGGGAACCT 3 +GACGACTCCTGTTAACCA 4 GACGACTCCTTGACCCTC TACGACTCCTTGACCCTC 5 1 GACGACTCCTTGGTTTCT 5 GACGAGATGTCACGGACT 5 +GACGAGATGTGAGGCTGA GACGAGATGTAAGGCTGA 4 1 GACGAGATGTGGATAAAG 6 GACGAGATGTGTTTGTTT GACGCGATGTGTTTGTTT 15 1 GACGAGATGTTCGGTTTA 8 GACGAGATGTTGCGTATC 5 +GACGCTAGTCGGCATGCT GACGCTAGTCAGCATGCT 3 1 GACGCTAGTCTGAGGTCT 9 GACGTCAGCACGTACCTA 5 +GACTCTTGACGCTTTGGC GACTCTTGACACTTTGGC 3 1 GACTGTCTGGCTCTTGAC GACTGTCTGGCTCTTGAA 8 1 GACTGTCTGGTACAGCCG 6 +GACTGTCTGGTGTACACG GACTGTCTGGCGTACACG 3 1 +GACTTACTCCATTACGAT 3 GACTTACTCCTCCGTCCA 9 +GACTTCTTCGAGGCAGTT 4 GACTTCTTCGATCCCACG 6 GAGAAGGCTTACATCTAT 7 GAGAAGGCTTGAATACGC GAGAAGGCTTAAATACGC 7 1 +GAGAAGGCTTTCCAGTCC 3 +GAGAAGGCTTTTAGTCCG 3 GAGAATACGCCGGCACAT 5 GAGAATTCGTATGGGCAC 6 +GAGAATTCGTGAGAATTG 4 GAGACAGATAAGTTTAGA 6 +GAGACAGATATACAGCCG GAGACAGATATACAGCCA 4 1 +GAGACGATGGCCTGTTAT 4 GAGAGAATTGATATGCAT 7 GAGAGAATTGATGGAAAT GAGAGAATTGATAGAAAT 6 1 GAGAGAATTGGATCGTTT 5 GAGAGAATTGGCATGGGT 8 GAGAGAATTGTACTTGTG 5 GAGAGAGTATATTCGACG 6 +GAGAGAGTATGCGTTGCT 3 +GAGAGCCCATGTAACGTT 4 GAGAGCCCATTTTGTGTC 8 GAGATCTCGGCAATTAGT 5 +GAGATCTCGGGCATGGGT 3 GAGATCTCGGTGTAGTTT GAGGTCTCGGTGTAGTTT 9 1 GAGATTGCGACTAGTAAC 5 GAGATTGCGAGTTACGAT GAGATTGCGAATTACGAT 5 1 GAGCGTTGCTATCAAGTG 8 +GAGCGTTGCTATCCGCTA 3 GAGCGTTGCTATTCCTTG 9 GAGGCGTTAGCTTCAGGT 5 +GAGGCGTTAGTTGATTCT 3 GAGGGAACCTTGCCATCG GAGGAAACCTTGCCATCG 6 1 +GAGGGAGGTACTAGTAAC 3 GAGGTCCCTTGAGTTTCG 8 +GAGGTCCCTTTACTTGTG 3 GAGTCCAAAGATGGATTA 5 GAGTCCAAAGCGAATAAT GAGTCCAAAGCGAATAAA,TAGTCCAAAGCGAATAAT 6 1,1 GAGTCCAAAGGGGAGGTA 5 GAGTCCAAAGTAGCCTCG 8 GAGTCCAAAGTGCCTCAC GAGTCCAAAGTGCCTGAC 6 1 +GAGTTAACCAGATCTCGG GAGTTAACCAAATCTCGG 3 1 GAGTTTCGCAAGAGGG 5 GAGTTTCGGTAATCCC 10 +GATAACCATCTCGAAGCT 3 GATAAGACGGCTTACTCC 7 GATAAGACGGTAACCCGT 7 GATACAAACTATGGAAAT 5 +GATACAAACTCACGGACT 3 +GATACAAACTGAACGATT 4 GATACAAACTGTAACGTT 7 GATAGTGTTTACACTAAG 5 +GATAGTGTTTTACCAGGC 4 +GATCCCAATCAATGTTTG 3 GATCCGTCCACGCTAATA GATCCGCCCACGCTAATA 5 1 GATCGGTACGCCAGCAGT GATCGGTACGCCAGCAGG 8 1 +GATCGGTACGTTGATCTA 4 GATCGGTTTATTCGGCCT GATCGATTTATTCGGCCT 7 1 +GATCGTTTGAGCGGTA GATCGTTTAAGCGGTA 4 1 +GATCGTTTGCAGCATG 4 +GATCGTTTTCATATGG 4 +GATCTCAACCACTGAGTT 3 +GATCTCAACCTAGCCTCG 3 GATCTCAACCTGGCCTGT GATCTCAACCTGACCTGT 6 1 GATCTGTGGTGGGTTGGT 5 GATCTGTGGTTTTGTGTC 7 @@ -227,21 +335,34 @@ GATGAGGTCTAGAAGTCC 6 GATGAGGTCTTGACCCTC 7 GATGAGGTCTTTGTTTAC 7 GATGAGGTCTTTTAACAG 6 +GATGCCATCGCTTCAGGT 3 +GATGGAGGAGGATGTGAG GATGGAGGAGAATGTGAG 3 1 GATGGGATTCCGGCACAT GATGGGATTCCGTCACAT 5 1 GATGGGATTCTCGGTTTA 7 +GATGGTTTCTGCAAGGAC 4 +GATGGTTTCTTAGAAATG 4 GATGGTTTCTTTGAGGGT GATGGTTTCTTTGAGGTT 5 1 GATGTAAAGGCCCTTCAG 7 +GATGTAAAGGGGATAAAG 3 +GATGTACACGAAAGCCTA 4 GATGTACACGCATCTCCC 7 GATGTAGTTTCAGATTAC 12 +GATGTAGTTTCTGTCTGG 4 GATGTAGTTTCTTACTCC 8 +GATGTGAGAGGAAGAC 3 GATGTGAGCGAATAAT 7 GATGTGAGGCAAAGCC GATGTGAGGCAAGGCC 11 1 +GATGTGAGTCGGTACG GATGTGAGTCGGTACT 3 1 +GATGTGAGTCGGTTTA GATGTGAGTCGATTTA,GATGTGAGTCGGTTTG 4 1,1 +GATGTTCCAGATTGGGCC 4 +GATTAGACGTAACGTT 3 GATTAGACTAACTACT 10 GATTAGACTGGCCTGT GATTAGACAGGCCTGT 6 1 GATTCCGAGTATGGGCAC 5 GATTCCGAGTCTTTCTTT 11 GATTGAGGGTAACCTGAC 12 GATTGAGGGTTAGCCTCG GATTGAGGGTTAGCTTCG 9 1 +GATTGATCTAAGTTTAGA 4 GATTGGCGTTGGTCCCTT 5 GATTGGCGTTTAGTCTAG 7 GATTTAACAGAACCTGAC GATTTAAAAGAACCTGAC 5 1 @@ -251,12 +372,19 @@ GATTTAACAGTCATATGG 6 GATTTAACAGTGCCTCAC GATTTAACAGTACCTCAC 5 1 GCAGCATGACACTAAG GCAGCATGACACTCAG 6 1 GCAGCATGATGGGCAC CCAGCATGATGGGCAC 7 1 +GCAGCATGGATCTCGG 3 +GCATGGGTTAACCCGT 4 GCTTACCTCTACTTGT 8 +GCTTTGGCACTAGCCA 4 +GGAAACAGACTAGGAT 3 +GGAAACAGTACCAGGC 3 GGAAACAGTCCCAATC 6 GGAGAAGCCCACATTA 5 GGATAAAGAGAGACTA 5 GGCATGCTTTCCGCTC 6 +GGGTTGGTGTTCAACT 4 GTCTCTCTAACTTAGC GTCTCTCTAACGTAGC 7 1 +GTCTCTCTATATGCAT 3 GTCTCTCTATTTCCGG GTCTCTCTATTTCGGG 11 1 GTCTCTCTCGAATAAT 8 GTCTCTCTGGTCTGAC 5 @@ -264,11 +392,13 @@ GTCTCTCTTGCGACAG 7 GTCTCTCTTTGTGACT 8 GTCTCTCTTTTATCAC 10 GTGTAACCTGCGACAG 5 +GTTACGATATGGAAAT 4 GTTCAACTCCGTGTTT GTTCAACTCCGTTTTT 6 1 GTTCAACTTACAGCCG 5 GTTCAACTTGACCAGT 7 GTTCAACTTGATGCCC 15 GTTGCACGCGATTGAT 5 +GTTGCACGGTCCGTAC 3 GTTGCACGTGTAGTTT GTTGCACGTGTAGTTG 7 1 GTTTGTTTCTCCGCAT 9 GTTTGTTTGGAAACAG 5 @@ -279,8 +409,13 @@ TACAGCCGCCCATCTG TACAGCCGCCCCTCTG 6 1 TACAGCCGGTTGCACG TACAGCCGGTTGCACA 11 1 TACAGCCGTCGGTACG 7 TACTTGTGTACAGCCG 8 +TACTTGTGTGACGGAC 4 TACTTGTGTGTTCCAG TACTTGTGTATTCCAG 10 1 +TAGCCTCGCCGCTGTT TAGCCTCGCCGCTTTT 3 1 +TAGCCTCGGCAGTCGA 3 +TAGCCTCGTCGACGGT 4 TAGCTTATCGGCACAT 7 +TAGCTTATTGTAAAGG 3 TAGTCTAGCGACGTCA TAGTCTAGCGACGTCC 6 1 TAGTCTAGTTATAGCC 5 TAGTCTAGTTGATCTA 7 @@ -290,16 +425,22 @@ TCATCCTTTCCAGTCC TCATCCTTTCTAGTCC 6 1 TCATCCTTTTCCGCTC TTATCCTTTTCCGCTC 8 1 TCCTTATTCAGTCCCT 10 TCCTTATTTAGTCTAG 6 +TCGAAGCTGTTACGAT 4 TCGAAGCTTAACCATC TCGAAGCTAAACCATC 7 1 TCGAAGCTTGCCTCAC 8 TCGAAGCTTTCCGAGT 6 +TCGAAGCTTTCGGCCT TCGAATCTTTCGGCCT 3 1 TGAAAACTGGTAACTTAGC 8 +TGAAAACTGGTACTTCAAT 3 TGAAAACTGGTCCCATCTG TGAAAACTGGTCCCATCTA 8 1 TGAAAACTGGTCTTTAATC 6 +TGAAAATGTCGGATTGCGA TGAAAATGTCGAATTGCGA 4 2 TGAAACAATCCATATGCAT 12 TGAAACACACGCTTTCTTT TGAAACACACGCTTTCTTA 16 1 +TGAAACACACGTACCGGCA 3 TGAAACCCTTGAATGTTTG 6 TGAAACCCTTGCAATTAGT 9 +TGAAACCCTTGGGAGAAGC TGAAACCCTTGAGAGAAGC 4 1 TGAACATCTATGAGCCCAT TGAACATCTATAAGCCCAT,TGAACATCTATGAGCACAT 8 1,1 TGAACGCTCAGTGACCCTC TGAACGCTCAGTGACTCTC 12 1 TGAAGAAGTTAACTTCAAT 5 @@ -310,45 +451,66 @@ TGAAGGTTGTGAGGCAGTT 6 TGAAGGTTGTGGCAGTCGA 8 TGAAGGTTGTGGCTTTGGC TGAAGGTTGTGGCTTTTGC 5 1 TGAAGGTTGTGGGAAACAG TGAAGGTTGTGAGAAACAG 7 1 +TGAATACCCAGGGAATAGA 3 TGAATACCCAGTCGGTACG TGAATACCCAGTCGGTATG 11 1 TGAATGAAGCACATTTGTT 6 +TGAATGAAGCAGAAAGACC GGAATGAAGCAGAAAGACC 3 1 +TGAATGACTTTAACTACTC TGAATGACTTTAACTACTT 3 1 +TGAATGACTTTACCTTCTT 3 TGAATGACTTTGAATACGC 9 +TGAATGACTTTGTAACGTT 4 TGAATGACTTTTAACTACT 9 TGAATGCATGGCATCGCAG 5 TGAATGCATGGCATCTCCC 10 +TGAATGCATGGCGAATAAT 3 TGAATTCGACGGCGTTGCT 10 TGACAACAAATTAGTGTTT TGAAAACAAATTAGTGTTT 7 1 TGACAAGTCATATTCCTTG TGACAAGTCATTTTCCTTG 6 1 +TGACAAGTCATCTTCTTCG TGGCAAGTCATCTTCTTCG 4 1 TGACAAGTCATTTAAGCGT 7 TGACAATACTTATTCCTTG 16 TGACACAACAGGAGCGGTA 7 +TGACACAACAGGCCTCTTT 4 TGACACAACAGTGATGCCC 6 TGACACAGTTTATATCTTC 9 +TGACACAGTTTTGGTATGA TGACACAGTTTGGGTATGA 3 1 TGACACAGTTTTGTACACG 9 TGACACTTTCTAGCACCAC 5 TGACACTTTCTGACGATGG 9 +TGACACTTTCTTAACTACT 3 +TGACAGACCATCCTATTCA 4 TGACAGACCATGGTCCCTT TGACAGACCATAGTCCCTT 10 1 +TGACATACATTGAGTTTCG TGACATACATTAAGTTTCG 3 1 +TGACATACATTGGACTTCC TGACATACATTAGACTTCC 4 1 TGACATACATTTACAAACT 10 TGACCAGCAGTCCACATTA 6 TGACCAGTGTCAAAGA 7 +TGACCAGTTACTTGTG TGAGCAGTTACTTGTG 3 1 TGACCATTACTCTTACTCC 11 TGACCATTACTGTCCAAAG TGACCATTACTTTCCAAAG 11 1 +TGACCATTACTTGTAAAGG 3 TGACCTCGTCTAGGAAGAC 7 TGACCTCGTCTCGAGATGT 9 TGACCTCGTCTTGCTATTT TGACCTCATCTTGCTATTT,TGACCTCGTCTTCCTATTT,TGACCTCGTCTTGCTATTC 6 1,1,1 TGACCTCGTCTTTCACATA 11 TGACGTAGAAGCAACAAAT 9 TGACGTAGAAGTTATAGCC 9 +TGACGTATCGGGAAAGACC 3 TGACTACCGTTTTTGGGAG 6 TGACTACGAGCCCTACTAG 7 TGACTCAGAATCATCTCCC 10 TGACTCAGAATTTATAGCC 6 +TGACTGTTCTTACCTTGCC 4 TGACTGTTCTTACTTCAAT TGACTGTCCTTACTTCAAT 8 1 +TGACTTGGTGTAGAAGTCC 3 +TGACTTGGTGTTTAAGCGT 4 +TGAGAAGCACTCAGTCCCT TGAGAAGCATTCAGTCCCT 3 1 TGAGAAGCACTCCCATCTG TGAGAAGCACTCCCATGTG 11 1 TGAGAAGCACTGTTTGTTT 6 TGAGACAAAGGATTTCCGG 8 TGAGACAACTTCAGTTTGC 8 TGAGAGTTCACTCGACACC 13 +TGAGATTTACCCTAGTAAC TGAGATTTACCGTAGTAAC 3 1 TGAGCAGGGTATTTGTGTC TGAGTAGGGTATTTGTGTC 13 1 TGAGCCGGATTATCCCACG 8 TGAGCGCATTCCCTATTCA TGAGCGCATTCGCTATTCA 6 1 @@ -356,49 +518,74 @@ TGAGCGCATTCCGTCAGCA 8 TGAGCGCATTCTCGGTACG 11 TGAGGAATAGAGGCATGCT 6 TGAGGAATAGATAGCTTAT 7 +TGAGGGAAATCGAGCCCAT 4 +TGAGGTTTCTCAGAGACTA 4 TGAGGTTTCTCAGCACCAC TGAGGTTTCTCAGCACCAT 10 1 TGAGGTTTCTCGATTGCGA TGAGGTTTCTCAATTGCGA 5 2 +TGAGTAATCCCTAGCCTCG 4 TGAGTACACTCGAATACGC 6 +TGAGTCCATGTAAACGCCT 3 TGAGTCCATGTCAAGAGGG 11 +TGAGTCCATGTCGGCTTAC 4 TGAGTCCATGTGGCGGTTT TGAGTCCATGTGGTGGTTT 5 1 TGAGTCCATGTTCTCAACC TGAGTCCATGTTCTCAAAC 11 1 -TGAGTCCGTACGTTGTCAT TGAGTCCGTACATTGTCAT 5 3 +TGAGTCCGTACATTGTCAT 3 +TGAGTCCGTACGTTGTCAT 5 TGAGTCGTCGTGAGCCCAT 5 +TGAGTGGAGCTACCACGCT 4 TGAGTGGAGCTATGACCGA 9 TGAGTGGAGCTCCCAAGCA 7 TGAGTGGAGCTCTTCTTCG 7 +TGAGTGGAGCTGATTAGAC 3 TGAGTGGAGCTGCATGGGT TGAGTGGAGCTACATGGGT,TGAGTGGCGCTGCATGGGT 6 2,1 +TGAGTGGAGCTGTTTGTTT 4 TGAGTGGAGCTTCCCAATC 12 TGAGTGGAGCTTGCAAGGG 11 TGAGTGGAGCTTTTAACAG 5 TGAGTTCTGTTTCGACACC TGAGTTCTGTTTCGACACT 5 1 TGATAACCCGTATTCAGTA 11 TGATAACCCGTCCTATTCA 8 +TGATAACCCGTGTTAACCA 4 TGATAACCCGTTTCGGCCT 14 +TGATACGTGCTAATTCCCA 3 TGATACGTGCTAGTGAAAG 7 TGATACGTGCTCGAGATGT 5 TGATACGTGCTGTTCAACT 10 +TGATACGTGCTTTTACCCT 3 +TGATAGAAATGCCCGAATG 4 TGATAGAAATGGGTGGGAT TGATAGAAATGGGTGGAAT 6 1 TGATAGAAATGTGACCCTC TGATAGAAATGAGACCCTC,TGATAGAAATGTGACCCTT 10 2,1 TGATATTGCCTAAATCAGA 14 +TGATATTGCCTGGTGGGAT 3 TGATCACCGAGCGCTAATA 7 TGATCACCGAGTAGTCTAG 5 TGATCCCGTAGCCGCTGTT TGATCCCGTAGGCGCTGTT 10 1 TGATCCCGTAGGCTCTAGT TGATCCCGTAGACTCTAGT 7 1 TGATCCCGTAGTTGCATAT TGATCCCGTCGTTGCATAT 13 1 +TGATCGACGGTCCCAAGCA 3 +TGATCGTCATGTACGTGCT 3 TGATCGTCATGTGCGACAG 6 +TGATGCCCATATGCAT 3 TGATGTCTTTCGGTGGGAT 7 TGATGTTTGAGACACTAAG TGATGTTTGAGGCACTAAG 9 1 +TGATTCGCTGGAAATCAGA 4 TGATTCGCTGGCGAATAAT 7 TGATTCGCTGGGCATGGGT TGATTCGCTGGACATGGGT 5 1 TGATTCGCTGGTTAAACTG 11 +TGATTCGCTGGTTGTGACT 3 +TGATTGCACGCTAAGACGG 3 TGATTGCACGCTGTGCTCA 8 TGATTGTCGCCCGGCACAT 6 TGATTTACCCTAGAGACTA TGATTTACCCTAGAAACTA 12 1 TGATTTACCCTGAATACGC 7 +TGCAAGGGATGGATTA 4 +TGCAAGGGGAATACGC 4 +TGCAAGGGTAAATAGG 3 TGCCTCACCTTCTGGA 5 TGCGACAGCTTTCTTT 5 TGCGACAGGGCGTTAG 6 +TGCGACAGGGCTACTA 4 +TGGCTACCGAGCCCAT 4 TGGCTACCTAGCTTAT 9 TGGCTACCTGCCATCG 7 TGGTATGAAGAGACTA 8 @@ -407,8 +594,14 @@ TTAAGCGTGAGCCCAT 6 TTATCTGTATGGGCAC 8 TTATCTGTCCCGAATG TTATCTGTCCCGAATA 6 1 TTATCTGTCGAATAAT 10 +TTATCTGTTGACGGAC 4 TTATCTGTTTATCTGT 7 +TTATCTGTTTGAGGGT 4 +TTATCTGTTTGATCTA TTATCTGTATGATCTA 3 1 TTGCATATAGGCAACG 5 TTGCATATCGAATAAT 10 +TTGGACTTAATGTTTG 4 +TTGGACTTAGGCAACG 3 TTGGACTTGAGTTTCG 15 TTTGTGTCCAGATTAC 8 +TTTGTGTCTGCAAGGG TTTGTGCCTGCAAGGG 4 1 diff --git a/tests/indrop_whitelist_3_errors.tsv b/tests/indrop_whitelist_3_errors.tsv index d50d42d3..8ea3001d 100644 --- a/tests/indrop_whitelist_3_errors.tsv +++ b/tests/indrop_whitelist_3_errors.tsv @@ -1,19 +1,29 @@ +AAAACCTCCCGACTCCT AAAACCTCCCGACACCT 3 1 +AAAACCTCCTGTACACG 3 AAAACGCCTATGACTTT 5 AAAACTCGACATCGCAG 5 AAAAGTCGGTCCTTATT 6 AAAATCGTTGCTCTAGT AAAATCGTTACTCTAGT 6 1 +AAAATCGTTGGTGGGAT AAAATCGTTAGTGGGAT 4 1 AAAGGTAATACACTAAG 10 AAAGGTAATAGAGACTA 8 AAAGTAATCAGAACGGG 7 +AAAGTAATCCGGCACAT AAAGTAATCAGGCACAT 3 1 AAATCGGGTCACGGACT 7 AAATCGGGTGGCGTTAG AATGCGGATGGCGTTAG 11 1 AAATGAATGTGTAGTTT AAATGAATGTGTGGTTT 9 1 +AAATGACCCGAGCGGTA AAATGACCCAAGCGGTA 4 1 +AAATGACCCTAACCATC 3 AACCCATATTAACCATC 15 +AACGAAACGATGGAAAT 3 +AACGAAACGGAGCCCAT 3 AACGAAACGGTTCAACT 15 AACTCACCGACGTATAC AACTCACCGACGTAACG,AACTCACCGGCGTATAC 6 1,1 AACTCACCGAGGAAGAC 9 AACTCACCGCCTATTCA 9 +AACTCACCGTGACCCTC AACTCACCGTGACGGAC 4 1 AACTGAGTTTGCCTCAC 7 +AACTTAGCGTCCAAAG 4 AAGACACCACTAGATTG AAAACGCCTCTAGATTG 7 1 AAGATGGCTATGGGCAC 11 AAGATGGCTGATCTCGG 6 @@ -22,6 +32,8 @@ AAGATGGCTTGGCTACC AAGATGGCTAGGCTACC 11 1 AAGCGAAGTATTCAGTA 10 AAGCGAAGTCTCCGCAT 8 AAGCGAAGTTCACCGAG 5 +AAGCGCCTTGCTTTGGC AAGCGCCTTACTTTGGC 4 1 +AAGCGCCTTGGTGGGAT 3 AAGCGCCTTTGTTTGAG 6 AAGCTACGGTCATATGG 10 AAGCTTCTCTTTCTTT 5 @@ -31,83 +43,124 @@ AATACTCTTAATTCCCA 13 AATACTCTTACACTAAG 10 AATACTCTTCACAAGGC AATACTCTTGACAAGGC 5 1 AATATACCTAGGCAGTT 8 +AATATCTTCATGACTTT 3 +AATATCTTCGGGAAATC AATATCTTCAGGAAATC 3 1 AATCCCACGATCAGCGC 11 AATCCCACGCACGGACT 9 +AATGCGGATTCCAGTCC AATGCGGATTCCAGTCA,AATGCGGATTCCTCCCC 3 1,1 AATGTTTGCCCTTCAG 6 AATGTTTGGGGAACCT AATGTTTGAGGAACCT 5 2 +ACAGATTACCGAACGTA 4 +ACAGATTACCTTACTCC 3 ACAGGTGTCAAATCAGA 9 +ACAGGTGTCCCCAAGCA 3 +ACCATTTGAGTTGTCAT ACCCTTGGTGTTGTCAT 4 1 ACCATTTGATCCCAATC ACCCTTGGTTCCCAATC 7 1 ACCCACGAGGGTTGGT ACCCACGAAGGTTGGT 9 1 +ACCCACGATAACTACT 4 ACCCACGATTGAGGGT 6 ACCCGACTTGCAAGGAC ACCCGACTTACAAGGAC,ACCCGACTTGCAAGGGA 9 1,1 ACCCGACTTGTTGTCAT ACCCGACTTATTGTCAT 6 1 +ACCCGACTTTCATATGG ACCCGACGTTCATATGG 4 1 ACCCGACTTTGCAAGGG 9 ACCCTAACCATGGATTA ACCGCAACTATGGATTA 10 1 ACCCTTGGTAAGCGTAC 5 +ACCCTTGGTATGTTGGC 4 +ACCGCAACTAGAGGTGG 4 ACCGTGTTTCTGTTCTT 5 ACCGTGTTTTGGTTTCT ACCGTGTTTTAGTGTTT,ACCGTGTTTTGAGGTCT 10 1,1 ACCTTCTTACTTCAAT 6 ACCTTCTTCTTTCTTT TCATCCTTCTTTCTTT 10 1 ACCTTCTTTGCGTATC ACCTTCTTAGCGTACA 11 1 -ACCTTCTTTTGGATCG ACCTTCTTTTGGCTTG,CCCATCTGTTGGATCG 12 1,3 +ACCTTCTTTTGGATCG ACCTTCTTTTGGCTTG 12 1 ACGCTCTCAACTAGCCA ACGCTCTCAGTTAACCA 6 1 ACGTGCTAGTCGGTTTA 17 +ACTACTTGTTTCCGAGT 4 ACTAGATTGACTAGGAT 11 ACTAGATTGTCGGTACG ACTAGATTGTCGGTAGC 14 1 +ACTGCGTTGAAGCTTCT 3 ACTTAGGTAAGTGATGC 7 ACTTCAATCTTTAATC ACTTCAATCTTTTTTT 5 1 ACTTCAATTGGGATTC 14 +AGAACGATTTGTAAAGG 3 AGAGACTAACTGCCGT 8 +AGAGACTACCGCTGTT 3 AGAGACTATGAAATGA AGAGACTATGTAATGA 11 1 AGAGACTATGACGGAC 10 AGAGGTGGCCCATCTG 9 AGAGGTGGCGACGTCA 5 AGATGTATTAAGCTTCT AGATGTATTACGCTTCT 7 1 AGATGTATTCCACATTA 8 +AGATGTATTCGATTGAT AGATGTATTCCCTTGGT 3 1 AGATGTATTTGTACACG 10 AGCAACCTGCGGCTTAC 8 +AGCACCACCATCTCCC 4 AGCACCTCTTACCAGGC 6 +AGCCAAGATGTTGTCAT AGCCAAGATGTTGTATC 4 1 AGCCAAGATTGAGGTCT 13 AGCCTCTTTAATTCCCA 10 AGCCTCTTTCAGTCCCT 8 +AGCTTTCCATACAGCCG 4 AGGCAACGTCATATGG 13 AGGCAACGTTTAACAG 9 AGGCCTAAGGGTGGGAT 5 AGGGAACGACTTCGCAC 9 AGGGAACGATTGCATAT 9 AGGGAAGGTGGCGTTAG CGGGAAGGTGGCGTTAG 9 1 +AGGGAAGGTTACTTGTG 3 AGGGCCAATTCTCACTT AGGGCCAATTCACGTTT 5 1 AGGGTGTATCAGTTTGC 8 +AGGGTGTATCTTGGTGT 4 AGGGTGTATGACAGATA AGGGCCAATGACAGATA,CGGGTGTATGACAGATA 5 1,1 +AGGTGACACCGAGATGT 4 AGGTTAGTGAAGGTAAT 10 -AGGTTAGTGTCGAAGCT AGGTTAGTGACCACGCT,CGGTTAGTGTCGAAGCT 6 3,1 +AGGTTAGTGACCACGCT 3 +AGGTTAGTGTCGAAGCT CGGTTAGTGTCGAAGCT 6 1 +AGTATGAGTAGAGGTGG 4 AGTATGAGTCCCATCTG AGTATGAGTCCCGACTT 5 1 AGTATGAGTTGCCTCAC AGTATGAGTAGCCTCAC 7 1 AGTCAAAGACCTGACAC AAGCAAAGTCCTGACAC 7 1 AGTCAATACACTGCCGT AGCCAAGATACTGCCGT,CGTCAATACACTGCCGT 5 1,1 +AGTCAATACTTGCATAT 4 +AGTGGATGGGAGAGTAT 3 AGTGTCGGACGGGCTTT 6 AGTTTACGTACTAGGAT 5 +AGTTTACGTCCCTAACC 4 +AGTTTACGTTGACGGAC AGTTTACGTTGACCCTC 3 1 +AGTTTACGTTTAAACTG 4 ATAGTCGCAATCAAGTG 6 ATAGTGGACCGAGATGT ATAGTGGACGGAGATGT 6 1 +ATATGCATGTACCTTG 4 ATCACGTTTAATGACCC ATCACGTTTACTGACCC,ATCACGTTTCCTGACAC 10 1,1 ATCACGTTTCCACATTA 8 ATCACGTTTCGAATAAT 5 ATCACGTTTTCATATGG ATCACGTTTTCATAGGT 7 1 +ATCCGCTAGGATAAAG TTCCGCTCGGATAAAG 3 1 ATCCGCTATGTAAAGG 6 ATCGGTTCGAACCTGAC 11 ATCGGTTCGCAAGTCAT 6 ATCGGTTCGGATTAGAC ATCGGTTCGGATTAACA,ATCGGTTCGTTTTAGAC 10 1,1 -ATCTTTGTGTCCATGT ATCTTTGTATCCATGT,ATCTTTGTCTTCAGGT 6 1,3 +ATCTGCATCAGCAGAAC GTCTGCATCAGCAGAAC 4 1 +ATCTTTGTCTTCAGGT ATCTTTGTCTTCAGGA,CTCTTTGTCTTCAGGT 3 1,1 +ATCTTTGTGTCCATGT 6 ATCTTTGTTAACCATC ATCTTTGTTGACCAGT 7 1 ATGCTCCGTGTTACGAT 9 +ATGCTCCGTTTATCTGT 4 ATGGCCTGTAATGTTTG ATGCTCCGTAATGTTTG 9 1 ATGGCCTGTAGCACCAC 7 +ATGGCCTGTGTCCGTAC ATGGCCTGTTTCCGCTC 3 1 +ATGGCCTGTGTTGTCAT 4 +ATGGCCTGTTGACGGAC 3 ATGTGTCCTATGGAAAT ATGTGTCCTGTGGAAAT 13 1 ATGTGTCCTGCTTTGGC ATATGTCCTGCTTTGGC 6 1 ATGTGTCCTTAACCATC 6 ATTCCAGACTTGATCTA 6 ATTCCAGACTTGTGACT ATTCCAGACTTGAGGGT 7 1 +ATTGATTCTATCAAGTG ATTGATTCTGTCAAGTG 3 1 +ATTGATTCTGTTAACCA ATTGATTCTATTAACCA,TTTGATTCTGTTAACCA 4 1,1 ATTGATTCTTGTAAAGG ATTGATTCTTGTAAAGA 6 1 +ATTGGATCGATGTCGGA 4 +ATTGGATCGGTGTCGGA 3 ATTGTGACTTCCGTCCA 6 ATTTCCGGCGAATAAT ATTTCCGGCAATTAGT,CTTTCCGGCGAATAAT 8 2,1 ATTTCCGGGTTACGAT ATTTCCGGATTACGAT,CTTTCCGGATTACGAT 8 1,1 @@ -115,13 +168,19 @@ ATTTCCGGTACTTGTG 9 CAGTTTGCGGCGTTAG CAGTTTACGGCGTTAG 7 1 CATCGCAGCGACTCCT 5 CATCGCAGCTTCAGGT 6 +CATCGCAGTAGCCTCG 4 CATCGCAGTTATAGCC 14 +CATCGCAGTTTATCAC 4 CCAACCGTCCACATTA 7 CCAACCGTCGGGCTTT 9 CCAACCGTGTTCAACT 12 CCAACCGTTAGTCTAG 7 +CCCATCTGTCTGTGGT CCCATCTGTCCGTGGT,CCCATCTGTCTGTGAT 4 1,1 +CCCATCTGTTGGATCG 3 CCGCTGTTATCAGCGC 8 +CCGCTGTTTGCAAGGG CCGCTGTTCGCAAGGG,CCGCTGTTTGCAAAGG,CCGCTGTTTGCCATCG 4 1,1,1 CGAACGTAATACTCTT CAAACGTAATACTCTT 7 1 +CGAACGTACATTTGTT 4 CGAACGTAGGCGTTAG CGAACGTAGGAGTTAG 9 1 CGAACGTATCCCAATC CGAACGTATGCGTATC 10 1 CGCTAATAGCATGGGT 5 @@ -130,10 +189,14 @@ CGGCTTACAGAGGTGG 6 CGTATTTCTTGCATAT CGTAATTCTTGCATAT 9 1 CTATAGAGCGAACGTA 5 CTATAGAGTGCCATCG CTATAGAGTGCTATTT 6 1 +CTATAGAGTTTGTGTC CTATAGAGTTTGTGTA 4 1 +CTTACGGGCCCATCTG 4 +CTTACGGGCGGCACAT 4 CTTACGGGTAGCTTAT 10 CTTCAGGTGGATAAAG 9 CTTCAGGTTAGCTTAT 9 CTTCGATTCTCGCGTA 6 +CTTCGATTGAAGTGCC 4 CTTCGATTGCGTTGCT CTTCGATTGAGTTTCG,CTTCGATTTCGTTGCT 7 1,1 CTTCGATTTAAATAGG CTTCGATTTAAATAGT 11 1 CTTCGATTTGGGATTC 9 @@ -142,84 +205,129 @@ CTTTAATCCGCTCTCA 6 CTTTAATCGGGAGGTA CTTTAATCAGGAGGTA,CTTTAATCTGGAGGTA 7 2,1 CTTTCTTTATTCCTTG 5 CTTTCTTTGGAGAAGC CTTTCTTTAGAGAAGC 5 2 -GAAAGATTGTGCTTACCT GAAAGATTGTACTTACCT 8 3 +GAAAGATTGTACTTACCT 3 +GAAAGATTGTGCTTACCT 8 GAAAGCGTACGTTAACCA GAAAGCGTACATTAACCA 8 1 -GAACACTAAGATGGAAAT GAACACTAAGTTGGACTT 8 3 +GAACACTAAGATGGAAAT 8 +GAACACTAAGCCCAAGCA 3 GAACACTAAGTGGTATGA 7 +GAACACTAAGTTGGACTT 3 GAACGCCATTATTTCCGG 8 GAACGCCATTCATCGCAG 11 GAACGCCATTTCTCAACC 13 +GAACTAGGATTGCAAGGG 4 GAACTGCCGTGTCAATAC GAACTGCCGTTTCAATAC 11 1 GAAGGAAGACCAGATTAC 6 GAAGGCAGTTATATGCAT 9 GAAGGCAGTTCAGTTTGC 10 GAAGTTTAGAAGGCAACG 11 +GAATCAAGTGGGATAAAG 4 GAATTCCTTGATTGGGCC 6 GAATTCCTTGTGACCCTC GAATTCCTTGAGACCCTC 7 1 +GACAATTAGTCTATAGAG GACAATTAGTCTATAAAG 3 1 GACACGGACTACCTTGCC 7 -GACACGGACTCACGGACT GACACGGACTCGCGGACT,GATACAAACTCACGGACT 9 1,3 +GACACGGACTCACGGACT GACACGGACTCGCGGACT 9 1 GACACGGACTCCGCAACT 6 +GACACGGACTCGGGCTTT 3 GACACGGACTTTAAGCGT 5 GACAGTCCCTTAACCATC 6 GACATTTGTTGAATACGC GACATTTGTTGAATACGA 13 1 GACATTTGTTTTTAACAG 7 +GACCACATTACAATTAGT 3 GACCACATTATGCAAGGG 9 +GACCCATAGCTGAGGTCT 3 GACCCGAATGAATGACCC 8 +GACCCGAATGGCATGGGT 3 GACCCGAATGTGACCAGT 7 +GACCCTTCAGATTCCTTG GACTCTTCAGATTCCTTG 4 1 GACCTACTAGCCTACTAG GACCTACTAGCCTACTAA 11 1 GACCTACTAGGGTGGGAT GACCTACTAGAGTGGGAT 11 1 GACCTACTAGTGCGTATC 6 +GACCTACTAGTTCACATA GACCTATTAGTTCACATC 4 1 +GACCTGACACTAACCATC 4 GACGAATAATAAGATTGT 5 +GACGACTCCTGGGAACCT 3 +GACGACTCCTGTTAACCA 4 GACGACTCCTTGACCCTC TACGACTCCTTGACCCTC 5 1 GACGACTCCTTGGTTTCT 5 GACGAGATGTCACGGACT 5 +GACGAGATGTGAGGCTGA GACGAGATGTAAGGCTGA 4 1 GACGAGATGTGGATAAAG 6 GACGAGATGTGTTTGTTT GACGCGATGTGTTTGTTT 15 1 GACGAGATGTTCGGTTTA 8 GACGAGATGTTGCGTATC GATGAGAAGTTGCGTAAC 5 1 +GACGCTAGTCGGCATGCT GACGCTAGTCAGCATGCT,GACGCTAGTCGTCATATT 3 1,1 GACGCTAGTCTGAGGTCT 9 GACGTCAGCACGTACCTA 5 +GACTCTTGACGCTTTGGC GACTCTTGACACTTTGGC 3 1 GACTGTCTGGCTCTTGAC GACTGTCTGGCTCTTGAA 8 1 GACTGTCTGGTACAGCCG 6 +GACTGTCTGGTGTACACG GACTGTCTGGCGTACACG 3 1 +GACTTACTCCATTACGAT 3 GACTTACTCCTCCGTCCA 9 +GACTTCTTCGAGGCAGTT GACTTCTTCGCCGCTGTT 4 1 GACTTCTTCGATCCCACG 6 GAGAAGGCTTACATCTAT 7 GAGAAGGCTTGAATACGC GAGAAGGCTTAAATACGC 7 1 +GAGAAGGCTTTCCAGTCC 3 +GAGAAGGCTTTTAGTCCG 3 GAGAATACGCCGGCACAT 5 GAGAATTCGTATGGGCAC 6 +GAGAATTCGTGAGAATTG 4 GAGACAGATAAGTTTAGA 6 +GAGACAGATATACAGCCG GAGACAGATATACAGCCA 4 1 +GAGACGATGGCCTGTTAT 4 GAGAGAATTGATATGCAT 7 GAGAGAATTGATGGAAAT 6 GAGAGAATTGGATCGTTT 5 GAGAGAATTGGCATGGGT 8 GAGAGAATTGTACTTGTG GAGAGAAATGTACCAGTG 5 1 GAGAGAGTATATTCGACG GAGAGAGTATATTCCTTG 6 1 +GAGAGAGTATGCGTTGCT 3 +GAGAGCCCATGTAACGTT 4 GAGAGCCCATTTTGTGTC 8 GAGATCTCGGCAATTAGT 5 +GAGATCTCGGGCATGGGT 3 GAGATCTCGGTGTAGTTT GAGATCTCGGTGTATTTG,GAGGTCTCGGTGTAGTTT 9 1,1 GAGATTGCGACTAGTAAC 5 GAGATTGCGAGTTACGAT GAGATCTCGGGTTACGAT,GAGATTGCGAATTACGAT 5 1,1 GAGCGTTGCTATCAAGTG 8 +GAGCGTTGCTATCCGCTA 3 GAGCGTTGCTATTCCTTG 9 GAGGCGTTAGCTTCAGGT 5 +GAGGCGTTAGTTGATTCT 3 GAGGGAACCTTGCCATCG GAGGAAACCTTGCCATCG 6 1 +GAGGGAGGTACTAGTAAC 3 GAGGTCCCTTGAGTTTCG 8 +GAGGTCCCTTTACTTGTG 3 GAGTCCAAAGATGGATTA 5 GAGTCCAAAGCGAATAAT GAGTCCAAAGCGAATAAA,TAGTCCAAAGCGAATAAT 6 1,1 GAGTCCAAAGGGGAGGTA 5 GAGTCCAAAGTAGCCTCG GAGTCCAAAGTAGTCGCA 8 1 GAGTCCAAAGTGCCTCAC GAGTCCAAAGAGCCTCGA,GAGTCCAAAGAGCCTCGG,GAGTCCAAAGTGCCTGAC 6 1,1,1 +GAGTTAACCAGATCTCGG GAGTTAACCAAATCTCGG 3 1 GAGTTTCGCAAGAGGG GAGTTTCGCGAGATGT 5 1 GAGTTTCGGTAATCCC 10 +GATAACCATCTCGAAGCT 3 GATAAGACGGCTTACTCC 7 GATAAGACGGTAACCCGT 7 GATACAAACTATGGAAAT GATACAAACTATGAAATG 5 1 +GATACAAACTCACGGACT 3 +GATACAAACTGAACGATT 4 GATACAAACTGTAACGTT GATACAAACTGTTCTGTT 7 1 GATAGTGTTTACACTAAG 5 +GATAGTGTTTTACCAGGC 4 +GATCCCAATCAATGTTTG 3 GATCCGTCCACGCTAATA GATCCGCCCACGCTAATA 5 1 GATCGGTACGCCAGCAGT GATCGGTACGCCAGCAGG 8 1 +GATCGGTACGTTGATCTA GATCGGTACGTTGATTAA 4 1 GATCGGTTTATTCGGCCT GATCGATTTATTCGGCCT 7 1 -GATCTCAACCTGGCCTGT GATCTCAACCTAGCCTCG,GATCTCAACCTGACCTGT 6 3,1 +GATCGTTTGAGCGGTA GATCGTTTAAGCGGTA 4 1 +GATCGTTTGCAGCATG 4 +GATCGTTTTCATATGG 4 +GATCTCAACCACTGAGTT 3 +GATCTCAACCTAGCCTCG 3 +GATCTCAACCTGGCCTGT GATCTCAACCTGACCTGT 6 1 GATCTGTGGTGGGTTGGT 5 GATCTGTGGTTTTGTGTC 7 GATGACCCTCGGCCCTTA 6 @@ -227,21 +335,34 @@ GATGAGGTCTAGAAGTCC 6 GATGAGGTCTTGACCCTC GATGAGGTCTTTCCGCTC 7 1 GATGAGGTCTTTGTTTAC GATGAGGTCTTTGATTCT 7 1 GATGAGGTCTTTTAACAG 6 +GATGCCATCGCTTCAGGT 3 +GATGGAGGAGGATGTGAG GATGGAGGAGAATGTGAG 3 1 GATGGGATTCCGGCACAT GATGGGATTCCGTCACAT 5 1 GATGGGATTCTCGGTTTA 7 +GATGGTTTCTGCAAGGAC GATGGTTTCTACTAGGAT 4 1 +GATGGTTTCTTAGAAATG 4 GATGGTTTCTTTGAGGGT GATGGTTTCTTCGAAGCT,GATGGTTTCTTTGAGGTT 5 2,1 GATGTAAAGGCCCTTCAG 7 +GATGTAAAGGGGATAAAG 3 +GATGTACACGAAAGCCTA 4 GATGTACACGCATCTCCC 7 GATGTAGTTTCAGATTAC 12 +GATGTAGTTTCTGTCTGG 4 GATGTAGTTTCTTACTCC 8 +GATGTGAGAGGAAGAC 3 GATGTGAGCGAATAAT GATGTGAGCGAATATG 7 1 GATGTGAGGCAAAGCC GATGTGAGGCAAGGCC 11 1 +GATGTGAGTCGGTACG 3 +GATGTGAGTCGGTTTA GATGTGAGTCGATTTA 4 1 +GATGTTCCAGATTGGGCC 4 +GATTAGACGTAACGTT 3 GATTAGACTAACTACT 10 GATTAGACTGGCCTGT GATTAGACAGGCCTGT 6 1 GATTCCGAGTATGGGCAC 5 GATTCCGAGTCTTTCTTT 11 GATTGAGGGTAACCTGAC 12 GATTGAGGGTTAGCCTCG GATTGAGGGTTAGCTTCG 9 1 +GATTGATCTAAGTTTAGA 4 GATTGGCGTTGGTCCCTT GATTGGCGTTTGACCCTC 5 1 GATTGGCGTTTAGTCTAG 7 GATTTAACAGAACCTGAC GATTTAAAAGAACCTGAC 5 1 @@ -251,12 +372,19 @@ GATTTAACAGTCATATGG 6 GATTTAACAGTGCCTCAC 5 GCAGCATGACACTAAG GCAGCATGACACTCAG 6 1 GCAGCATGATGGGCAC CCAGCATGATGGGCAC 7 1 +GCAGCATGGATCTCGG GTTGCACGGATCTCGG 3 1 +GCATGGGTTAACCCGT GCATGGGTCCAACCGT 4 1 GCTTACCTCTACTTGT GCTTACCTCTTCTTCG 8 1 -GGAAACAGTCCCAATC GGAAACAGTACCAGGC,TGCGACAGTCCCAATC 6 3,1 +GCTTTGGCACTAGCCA 4 +GGAAACAGACTAGGAT 3 +GGAAACAGTACCAGGC 3 +GGAAACAGTCCCAATC TGCGACAGTCCCAATC 6 1 GGAGAAGCCCACATTA 5 GGATAAAGAGAGACTA 5 GGCATGCTTTCCGCTC 6 +GGGTTGGTGTTCAACT GGGTTGGTGTTCAATC 4 1 GTCTCTCTAACTTAGC GTCTCTCTAACGTAGC 7 1 +GTCTCTCTATATGCAT 3 GTCTCTCTATTTCCGG GTCTCTCTAATTCCCA,GTCTCTCTATTTCGGG 11 1,1 GTCTCTCTCGAATAAT 8 GTCTCTCTGGTCTGAC GTCTCTCTGGACTTCC 5 1 @@ -264,11 +392,13 @@ GTCTCTCTTGCGACAG 7 GTCTCTCTTTGTGACT 8 GTCTCTCTTTTATCAC 10 GTGTAACCTGCGACAG 5 +GTTACGATATGGAAAT 4 GTTCAACTCCGTGTTT GTTCAACTCACAGTTT,GTTCAACTCCGTTTTT 6 1,1 GTTCAACTTACAGCCG GCTCTAGTTACAGCCG,GTTCAACTTACCGGCA 5 1,1 GTTCAACTTGACCAGT 7 GTTCAACTTGATGCCC 15 GTTGCACGCGATTGAT 5 +GTTGCACGGTCCGTAC 3 GTTGCACGTGTAGTTT GTTGCACGTGTAGTTG 7 1 GTTTGTTTCTCCGCAT GTTTGTTTTTCCGCTC 9 1 GTTTGTTTGGAAACAG 5 @@ -279,8 +409,13 @@ TACAGCCGCCCATCTG TACAGCCGCCCCTCTG,TACAGCCGCCCGACTT 6 1,1 TACAGCCGGTTGCACG TACAGCCGGCAGCATG,TACAGCCGGTTGCACA 11 1,1 TACAGCCGTCGGTACG 7 TACTTGTGTACAGCCG 8 +TACTTGTGTGACGGAC 4 TACTTGTGTGTTCCAG TACTTGTGTATTCCAG 10 1 +TAGCCTCGCCGCTGTT TAGCCTCGCCGCTTTT 3 1 +TAGCCTCGGCAGTCGA 3 +TAGCCTCGTCGACGGT TAGCCTCGTCGAGTGC 4 1 TAGCTTATCGGCACAT 7 +TAGCTTATTGTAAAGG 3 TAGTCTAGCGACGTCA TAGTCTAGCGACGTCC 6 1 TAGTCTAGTTATAGCC 5 TAGTCTAGTTGATCTA AGGCCTAGTTGATCTA 7 1 @@ -290,16 +425,22 @@ TCATCCTTTCCAGTCC TCATCCTTTCTAGTCC 6 1 TCATCCTTTTCCGCTC TCCTTATTTTCCGCTC,TTATCCTTTTCCGCTC 8 1,1 TCCTTATTCAGTCCCT 10 TCCTTATTTAGTCTAG 6 +TCGAAGCTGTTACGAT TCGAAGCTGGTGGGAT 4 2 TCGAAGCTTAACCATC TCGAAGCTAAACCATC 7 1 TCGAAGCTTGCCTCAC 8 -TCGAAGCTTTCCGAGT TCGAAGCTTTCGGCCT 6 3 +TCGAAGCTTTCCGAGT 6 +TCGAAGCTTTCGGCCT TCGAATCTTTCGGCCT 3 1 TGAAAACTGGTAACTTAGC 8 +TGAAAACTGGTACTTCAAT 3 TGAAAACTGGTCCCATCTG TGAAAACTGGTCCCATCTA 8 1 TGAAAACTGGTCTTTAATC 6 +TGAAAATGTCGGATTGCGA TGAAAATGTCGAATTGCGA 4 2 TGAAACAATCCATATGCAT 12 TGAAACACACGCTTTCTTT TGAAACACACGCCGTGTTT,TGAAACACACGCTTTAATC,TGAAACACACGCTTTCTTA 16 1,1,1 +TGAAACACACGTACCGGCA 3 TGAAACCCTTGAATGTTTG 6 TGAAACCCTTGCAATTAGT 9 +TGAAACCCTTGGGAGAAGC TGAAACCCTTGAGAGAAGC 4 1 TGAACATCTATGAGCCCAT TGAACATCTATAAGCCCAT,TGAACATCTATGAGCACAT 8 1,1 TGAACGCTCAGTGACCCTC TGAACGCTCAGTGACCTCA,TGAACGCTCAGTGACTCTC 12 1,1 TGAAGAAGTTAACTTCAAT 5 @@ -310,45 +451,66 @@ TGAAGGTTGTGAGGCAGTT 6 TGAAGGTTGTGGCAGTCGA 8 TGAAGGTTGTGGCTTTGGC TGAAGGTTGTGGCTTTTGC 5 1 TGAAGGTTGTGGGAAACAG TGAAGGTTGTGAGAAACAG 7 1 +TGAATACCCAGGGAATAGA 3 TGAATACCCAGTCGGTACG TGAATACCCAGTCGGTATG 11 1 TGAATGAAGCACATTTGTT TGAATGAAGCACATTTTTA 6 1 -TGAATGACTTTGAATACGC TGAATGACTTTAACTACTC 9 3 +TGAATGAAGCAGAAAGACC GGAATGAAGCAGAAAGACC 3 1 +TGAATGACTTTAACTACTC 3 +TGAATGACTTTACCTTCTT TGAATGACTTTTCAATCTT 3 1 +TGAATGACTTTGAATACGC 9 +TGAATGACTTTGTAACGTT 4 TGAATGACTTTTAACTACT 9 TGAATGCATGGCATCGCAG 5 TGAATGCATGGCATCTCCC 10 +TGAATGCATGGCGAATAAT 3 TGAATTCGACGGCGTTGCT 10 TGACAACAAATTAGTGTTT TGAAAACAAATTAGTGTTT 7 1 -TGACAAGTCATATTCCTTG TGACAAGTCATCTTCTTCG,TGACAAGTCATTTTCCTTG 6 4,1 +TGACAAGTCATATTCCTTG 6 +TGACAAGTCATCTTCTTCG TGACCAGTCCTCTTCTTCC,TGGCAAGTCATCTTCTTCG 4 1,1 TGACAAGTCATTTAAGCGT 7 TGACAATACTTATTCCTTG TGACAATACTTATTTCCGG 16 1 TGACACAACAGGAGCGGTA 7 +TGACACAACAGGCCTCTTT 4 TGACACAACAGTGATGCCC 6 TGACACAGTTTATATCTTC TGACACAGTTTCTTTCTTT 9 1 +TGACACAGTTTTGGTATGA TGACACAGTTTGGGTATGA,TGACACAGTTTTGCTATTT 3 1,1 TGACACAGTTTTGTACACG 9 TGACACTTTCTAGCACCAC 5 TGACACTTTCTGACGATGG 9 +TGACACTTTCTTAACTACT 3 +TGACAGACCATCCTATTCA TGACAGACCATCCTACTAG 4 2 TGACAGACCATGGTCCCTT TGACAGACCATAGTCCCTT 10 1 +TGACATACATTGAGTTTCG TGACATACATTAAGTTTCG 3 1 +TGACATACATTGGACTTCC TGACAGACCATGGACTTCC,TGACATACATTAGACTTCC 4 1,1 TGACATACATTTACAAACT TGACATACATTAACAATCC 10 1 TGACCAGCAGTCCACATTA TGACCATTACTCCACATTA 6 1 TGACCAGTGTCAAAGA 7 +TGACCAGTTACTTGTG TGAGCAGTTACTTGTG 3 1 TGACCATTACTCTTACTCC 11 TGACCATTACTGTCCAAAG TGACCATTACTTTCCAAAG 11 1 +TGACCATTACTTGTAAAGG TGACCAGTACTTGTGT 3 1 TGACCTCGTCTAGGAAGAC 7 TGACCTCGTCTCGAGATGT 9 TGACCTCGTCTTGCTATTT TGACCTCATCTTGCTATTT,TGACCTCGTCTTCCTATTT,TGACCTCGTCTTGCTATTC 6 1,1,1 TGACCTCGTCTTTCACATA 11 TGACGTAGAAGCAACAAAT TGACGTAGAAGCGAATAAT 9 1 TGACGTAGAAGTTATAGCC TGACGTAGAAGTTATACCA 9 1 +TGACGTATCGGGAAAGACC TGACGTATCGGGAAACTCC 3 1 TGACTACCGTTTTTGGGAG 6 TGACTACGAGCCCTACTAG 7 TGACTCAGAATCATCTCCC 10 TGACTCAGAATTTATAGCC 6 +TGACTGTTCTTACCTTGCC TGACAATACTTACCTTGCC 4 1 TGACTGTTCTTACTTCAAT TGACTGTCCTTACTTCAAT 8 1 +TGACTTGGTGTAGAAGTCC 3 +TGACTTGGTGTTTAAGCGT 4 +TGAGAAGCACTCAGTCCCT TGAGAAGCATTCAGTCCCT 3 1 TGAGAAGCACTCCCATCTG TGAGAAGCACTCCCATGTG 11 1 TGAGAAGCACTGTTTGTTT 6 TGAGACAAAGGATTTCCGG TGACACAAGGCATTTCCGG 8 1 TGAGACAACTTCAGTTTGC TGAGACAACTTCGGCTTAC 8 2 TGAGAGTTCACTCGACACC 13 +TGAGATTTACCCTAGTAAC TGAGATTTACCGTAGTAAC 3 1 TGAGCAGGGTATTTGTGTC TGAGTAGGGTATTTGTGTC 13 1 TGAGCCGGATTATCCCACG 8 TGAGCGCATTCCCTATTCA TGAGCGCATTCGCTATTCA 6 1 @@ -356,50 +518,75 @@ TGAGCGCATTCCGTCAGCA 8 TGAGCGCATTCTCGGTACG 11 TGAGGAATAGAGGCATGCT 6 TGAGGAATAGATAGCTTAT TGAGGAATAGAGAGCCCAT 7 1 +TGAGGGAAATCGAGCCCAT 4 +TGAGGTTTCTCAGAGACTA 4 TGAGGTTTCTCAGCACCAC TGAGGTTTCTCAGCACCAT 10 1 TGAGGTTTCTCGATTGCGA TGAGGTTTCTCAATTGCGA 5 2 +TGAGTAATCCCTAGCCTCG TGAGTAATTCCTAGCCGCG 4 1 TGAGTACACTCGAATACGC 6 +TGAGTCCATGTAAACGCCT TGAGTCCATGTAAAAGACC 3 1 TGAGTCCATGTCAAGAGGG 11 +TGAGTCCATGTCGGCTTAC 4 TGAGTCCATGTGGCGGTTT TGAGTCCATGTGGTGGTTT 5 1 -TGAGTCCATGTTCTCAACC TGAGTCCATGTTCTCAAAC,TGAGTCCATGTTGGCTACC 11 1,1 -TGAGTCCGTACGTTGTCAT TGAGTCCGTACATTGTCAT 5 3 +TGAGTCCATGTTCTCAACC TGAGTCCATGTTCTCAAAC 11 1 +TGAGTCCGTACATTGTCAT 3 +TGAGTCCGTACGTTGTCAT 5 TGAGTCGTCGTGAGCCCAT TGAGTCGTCGTCAGTCCCT 5 1 +TGAGTGGAGCTACCACGCT 4 TGAGTGGAGCTATGACCGA TGAGTGGAGCTATCAACGC 9 1 -TGAGTGGAGCTCCCAAGCA TGAGTGGAGCTACCACGCT 7 4 +TGAGTGGAGCTCCCAAGCA 7 TGAGTGGAGCTCTTCTTCG TGAGTGGAGCTTTTGTTTG 7 1 +TGAGTGGAGCTGATTAGAC 3 TGAGTGGAGCTGCATGGGT TGAGTGGAGCTACATGGGT,TGAGTGGCGCTGCATGGGT 6 2,1 +TGAGTGGAGCTGTTTGTTT 4 TGAGTGGAGCTTCCCAATC 12 TGAGTGGAGCTTGCAAGGG 11 TGAGTGGAGCTTTTAACAG 5 TGAGTTCTGTTTCGACACC TGAGTTCTGTTTCGACACT 5 1 TGATAACCCGTATTCAGTA 11 TGATAACCCGTCCTATTCA 8 +TGATAACCCGTGTTAACCA 4 TGATAACCCGTTTCGGCCT 14 +TGATACGTGCTAATTCCCA 3 TGATACGTGCTAGTGAAAG TGATACGTGCTATGGAAAT 7 1 TGATACGTGCTCGAGATGT TGATACGTGCTCGATGTCT 5 1 TGATACGTGCTGTTCAACT 10 +TGATACGTGCTTTTACCCT 3 +TGATAGAAATGCCCGAATG 4 TGATAGAAATGGGTGGGAT TGATAGAAATGGGTGGAAT 6 1 TGATAGAAATGTGACCCTC TGATAGAAATGAGACCCTC,TGATAGAAATGTGACCCTT 10 2,1 TGATATTGCCTAAATCAGA 14 +TGATATTGCCTGGTGGGAT 3 TGATCACCGAGCGCTAATA 7 TGATCACCGAGTAGTCTAG 5 TGATCCCGTAGCCGCTGTT 10 TGATCCCGTAGGCTCTAGT TGATCCCGTAGACTCTAGT 7 1 TGATCCCGTAGTTGCATAT TGATCCCGTCGTTGCATAT 13 1 +TGATCGACGGTCCCAAGCA 3 +TGATCGTCATGTACGTGCT 3 TGATCGTCATGTGCGACAG 6 +TGATGCCCATATGCAT 3 TGATGTCTTTCGGTGGGAT TGATGTCTTTCACTAGGAT 7 1 TGATGTTTGAGACACTAAG TGATGTTTGAGGCACTAAG 9 1 +TGATTCGCTGGAAATCAGA 4 TGATTCGCTGGCGAATAAT TGATTCGCTGGCGAATATG 7 1 TGATTCGCTGGGCATGGGT TGATTCGCTGGACATGGGT 5 1 TGATTCGCTGGTTAAACTG 11 +TGATTCGCTGGTTGTGACT 3 +TGATTGCACGCTAAGACGG 3 TGATTGCACGCTGTGCTCA 8 TGATTGTCGCCCGGCACAT TGATCGACACCCGGCACAT 6 1 TGATTTACCCTAGAGACTA TGATTTACCCTAGAAACTA 12 1 TGATTTACCCTGAATACGC 7 +TGCAAGGGATGGATTA 4 +TGCAAGGGGAATACGC 4 +TGCAAGGGTAAATAGG TGCAAGGGTAGAAATG 3 1 TGCCTCACCTTCTGGA 5 TGCGACAGCTTTCTTT 5 TGCGACAGGGCGTTAG 6 -TGGCTACCTAGCTTAT TGGCTACCGAGCCCAT,TGGCTACCTGGTTTCT 9 4,2 +TGCGACAGGGCTACTA 4 +TGGCTACCGAGCCCAT 4 +TGGCTACCTAGCTTAT TGGCTACCTGGTTTCT 9 2 TGGCTACCTGCCATCG 7 TGGTATGAAGAGACTA 8 TTAAGCGTGACAAAGG TTAAGCGTTACAAACT 8 1 @@ -407,8 +594,14 @@ TTAAGCGTGAGCCCAT 6 TTATCTGTATGGGCAC 8 TTATCTGTCCCGAATG TTATCTGTCCCGAATA 6 1 TTATCTGTCGAATAAT 10 +TTATCTGTTGACGGAC 4 TTATCTGTTTATCTGT 7 -TTGCATATAGGCAACG TTGGACTTAGGCAACG 5 3 +TTATCTGTTTGAGGGT 4 +TTATCTGTTTGATCTA TTATCTGTATGATCTA 3 1 +TTGCATATAGGCAACG 5 TTGCATATCGAATAAT 10 +TTGGACTTAATGTTTG 4 +TTGGACTTAGGCAACG 3 TTGGACTTGAGTTTCG 15 TTTGTGTCCAGATTAC 8 +TTTGTGTCTGCAAGGG TTTGTGCCTGCAAGGG 4 1 diff --git a/tests/indrop_whitelist_density.tsv b/tests/indrop_whitelist_density.tsv new file mode 100644 index 00000000..1f0f7ca4 --- /dev/null +++ b/tests/indrop_whitelist_density.tsv @@ -0,0 +1,414 @@ +AAAACGCCTATGACTTT 5 +AAAACTCGACATCGCAG 5 +AAAAGTCGGTCCTTATT 6 +AAAATCGTTGCTCTAGT AAAATCGTTACTCTAGT 6 1 +AAAGGTAATACACTAAG 10 +AAAGGTAATAGAGACTA 8 +AAAGTAATCAGAACGGG 7 +AAATCGGGTCACGGACT 7 +AAATCGGGTGGCGTTAG 11 +AAATGAATGTGTAGTTT AAATGAATGTGTGGTTT 9 1 +AACCCATATTAACCATC 15 +AACGAAACGGTTCAACT 15 +AACTCACCGACGTATAC AACTCACCGGCGTATAC 6 1 +AACTCACCGAGGAAGAC 9 +AACTCACCGCCTATTCA 9 +AACTGAGTTTGCCTCAC 7 +AAGACACCACTAGATTG 7 +AAGATGGCTATGGGCAC 11 +AAGATGGCTGATCTCGG 6 +AAGATGGCTTGATGCCC 7 +AAGATGGCTTGGCTACC AAGATGGCTAGGCTACC 11 1 +AAGCGAAGTATTCAGTA 10 +AAGCGAAGTCTCCGCAT 8 +AAGCGAAGTTCACCGAG 5 +AAGCGCCTTTGTTTGAG 6 +AAGCTACGGTCATATGG 10 +AAGCTTCTCTTTCTTT 5 +AAGCTTCTGCTTACCT 7 +AAGTGAAAGATATGCAT 5 +AATACTCTTAATTCCCA 13 +AATACTCTTACACTAAG 10 +AATACTCTTCACAAGGC AATACTCTTGACAAGGC 5 1 +AATATACCTAGGCAGTT 8 +AATCCCACGATCAGCGC 11 +AATCCCACGCACGGACT 9 +AATGTTTGCCCTTCAG 6 +AATGTTTGGGGAACCT AATGTTTGAGGAACCT 5 2 +ACAGGTGTCAAATCAGA 9 +ACCATTTGATCCCAATC 7 +ACCCACGAGGGTTGGT ACCCACGAAGGTTGGT 9 1 +ACCCACGATTGAGGGT 6 +ACCCGACTTGCAAGGAC ACCCGACTTACAAGGAC 9 1 +ACCCGACTTGTTGTCAT ACCCGACTTATTGTCAT 6 1 +ACCCGACTTTGCAAGGG 9 +ACCCTAACCATGGATTA 10 +ACCCTTGGTAAGCGTAC 5 +ACCGTGTTTCTGTTCTT 5 +ACCGTGTTTTGGTTTCT 10 +ACCTTCTTACTTCAAT 6 +ACCTTCTTCTTTCTTT 10 +ACCTTCTTTGCGTATC 11 +ACCTTCTTTTGGATCG 12 +ACGCTCTCAACTAGCCA 6 +ACGTGCTAGTCGGTTTA 17 +ACTAGATTGACTAGGAT 11 +ACTAGATTGTCGGTACG 14 +ACTTAGGTAAGTGATGC 7 +ACTTCAATCTTTAATC 5 +ACTTCAATTGGGATTC 14 +AGAGACTAACTGCCGT 8 +AGAGACTATGAAATGA AGAGACTATGTAATGA 11 1 +AGAGACTATGACGGAC 10 +AGAGGTGGCCCATCTG 9 +AGAGGTGGCGACGTCA 5 +AGATGTATTAAGCTTCT AGATGTATTACGCTTCT 7 1 +AGATGTATTCCACATTA 8 +AGATGTATTTGTACACG 10 +AGCAACCTGCGGCTTAC 8 +AGCACCTCTTACCAGGC 6 +AGCCAAGATTGAGGTCT 13 +AGCCTCTTTAATTCCCA AGCCTCTTTAATTCCCC,AGCCTCTTTGATTCCCA 10 1,1 +AGCCTCTTTCAGTCCCT 8 +AGGCAACGTCATATGG 13 +AGGCAACGTTTAACAG 9 +AGGCCTAAGGGTGGGAT 5 +AGGGAACGACTTCGCAC 9 +AGGGAACGATTGCATAT 9 +AGGGAAGGTGGCGTTAG CGGGAAGGTGGCGTTAG 9 1 +AGGGCCAATTCTCACTT 5 +AGGGTGTATCAGTTTGC 8 +AGGGTGTATGACAGATA CGGGTGTATGACAGATA 5 1 +AGGTTAGTGAAGGTAAT 10 +AGGTTAGTGTCGAAGCT CGGTTAGTGTCGAAGCT 6 1 +AGTATGAGTCCCATCTG 5 +AGTATGAGTTGCCTCAC AGTATGAGTAGCCTCAC 7 1 +AGTCAAAGACCTGACAC 7 +AGTCAATACACTGCCGT CGTCAATACACTGCCGT 5 1 +AGTGTCGGACGGGCTTT 6 +AGTTTACGTACTAGGAT 5 +ATAGTCGCAATCAAGTG 6 +ATAGTGGACCGAGATGT ATAGTGGACGGAGATGT 6 1 +ATCACGTTTAATGACCC ATCACGTTTACTGACCC 10 1 +ATCACGTTTCCACATTA 8 +ATCACGTTTCGAATAAT 5 +ATCACGTTTTCATATGG 7 +ATCCGCTATGTAAAGG 6 +ATCGGTTCGAACCTGAC 11 +ATCGGTTCGCAAGTCAT 6 +ATCGGTTCGGATTAGAC 10 +ATCTTTGTGTCCATGT ATCTTTGTATCCATGT 6 1 +ATCTTTGTTAACCATC 7 +ATGCTCCGTGTTACGAT 9 +ATGGCCTGTAATGTTTG 9 +ATGGCCTGTAGCACCAC 7 +ATGTGTCCTATGGAAAT ATGTGTCCTGTGGAAAT 13 1 +ATGTGTCCTGCTTTGGC ATATGTCCTGCTTTGGC 6 1 +ATGTGTCCTTAACCATC 6 +ATTCCAGACTTGATCTA 6 +ATTCCAGACTTGTGACT 7 +ATTGATTCTTGTAAAGG ATTGATTCTTGTAAAGA 6 1 +ATTGTGACTTCCGTCCA 6 +ATTTCCGGCGAATAAT CTTTCCGGCGAATAAT 8 1 +ATTTCCGGGTTACGAT ATTTCCGGATTACGAT 8 1 +ATTTCCGGTACTTGTG 9 +CAGTTTGCGGCGTTAG CAGTTTACGGCGTTAG 7 1 +CATCGCAGCGACTCCT 5 +CATCGCAGCTTCAGGT 6 +CATCGCAGTTATAGCC 14 +CCAACCGTCCACATTA 7 +CCAACCGTCGGGCTTT 9 +CCAACCGTGTTCAACT 12 +CCAACCGTTAGTCTAG 7 +CCGCTGTTATCAGCGC 8 +CGAACGTAATACTCTT CAAACGTAATACTCTT 7 1 +CGAACGTAGGCGTTAG CGAACGTAGGAGTTAG 9 1 +CGAACGTATCCCAATC 10 +CGCTAATAGCATGGGT 5 +CGGCACATTTTGTGTC 5 +CGGCTTACAGAGGTGG 6 +CGTATTTCTTGCATAT CGTAATTCTTGCATAT 9 1 +CTATAGAGCGAACGTA 5 +CTATAGAGTGCCATCG 6 +CTTACGGGTAGCTTAT 10 +CTTCAGGTGGATAAAG 9 +CTTCAGGTTAGCTTAT 9 +CTTCGATTCTCGCGTA 6 +CTTCGATTGCGTTGCT CTTCGATTTCGTTGCT 7 1 +CTTCGATTTAAATAGG CTTCGATTTAAATAGT 11 1 +CTTCGATTTGGGATTC 9 +CTTCGATTTTGAGGGT 7 +CTTTAATCCGCTCTCA 6 +CTTTAATCGGGAGGTA CTTTAATCAGGAGGTA,CTTTAATCTGGAGGTA 7 2,1 +CTTTCTTTATTCCTTG 5 +CTTTCTTTGGAGAAGC CTTTCTTTAGAGAAGC 5 2 +GAAAGATTGTGCTTACCT GAAAGATTGTACTTACCT 8 3 +GAAAGCGTACGTTAACCA GAAAGCGTACATTAACCA 8 1 +GAACACTAAGATGGAAAT 8 +GAACACTAAGTGGTATGA 7 +GAACGCCATTATTTCCGG 8 +GAACGCCATTCATCGCAG 11 +GAACGCCATTTCTCAACC 13 +GAACTGCCGTGTCAATAC GAACTGCCGTTTCAATAC 11 1 +GAAGGAAGACCAGATTAC 6 +GAAGGCAGTTATATGCAT 9 +GAAGGCAGTTCAGTTTGC 10 +GAAGTTTAGAAGGCAACG 11 +GAATTCCTTGATTGGGCC 6 +GAATTCCTTGTGACCCTC GAATTCCTTGAGACCCTC 7 1 +GACACGGACTACCTTGCC 7 +GACACGGACTCACGGACT GACACGGACTCGCGGACT 9 1 +GACACGGACTCCGCAACT 6 +GACACGGACTTTAAGCGT 5 +GACAGTCCCTTAACCATC 6 +GACATTTGTTGAATACGC GACATTTGTTGAATACGA 13 1 +GACATTTGTTTTTAACAG 7 +GACCACATTATGCAAGGG 9 +GACCCGAATGAATGACCC 8 +GACCCGAATGTGACCAGT 7 +GACCTACTAGCCTACTAG GACCTACTAGCCTACTAA 11 1 +GACCTACTAGGGTGGGAT GACCTACTAGAGTGGGAT 11 1 +GACCTACTAGTGCGTATC 6 +GACGAATAATAAGATTGT 5 +GACGACTCCTTGACCCTC TACGACTCCTTGACCCTC 5 1 +GACGACTCCTTGGTTTCT 5 +GACGAGATGTCACGGACT 5 +GACGAGATGTGGATAAAG 6 +GACGAGATGTGTTTGTTT GACGCGATGTGTTTGTTT 15 1 +GACGAGATGTTCGGTTTA 8 +GACGAGATGTTGCGTATC 5 +GACGCTAGTCTGAGGTCT 9 +GACGTCAGCACGTACCTA 5 +GACTGTCTGGCTCTTGAC GACTGTCTGGCTCTTGAA 8 1 +GACTGTCTGGTACAGCCG 6 +GACTTACTCCTCCGTCCA 9 +GACTTCTTCGATCCCACG 6 +GAGAAGGCTTACATCTAT 7 +GAGAAGGCTTGAATACGC GAGAAGGCTTAAATACGC 7 1 +GAGAATACGCCGGCACAT 5 +GAGAATTCGTATGGGCAC 6 +GAGACAGATAAGTTTAGA 6 +GAGAGAATTGATATGCAT 7 +GAGAGAATTGATGGAAAT GAGAGAATTGATAGAAAT 6 1 +GAGAGAATTGGATCGTTT 5 +GAGAGAATTGGCATGGGT 8 +GAGAGAATTGTACTTGTG 5 +GAGAGAGTATATTCGACG 6 +GAGAGCCCATTTTGTGTC 8 +GAGATCTCGGCAATTAGT 5 +GAGATCTCGGTGTAGTTT GAGGTCTCGGTGTAGTTT 9 1 +GAGATTGCGACTAGTAAC 5 +GAGATTGCGAGTTACGAT GAGATTGCGAATTACGAT 5 1 +GAGCGTTGCTATCAAGTG 8 +GAGCGTTGCTATTCCTTG 9 +GAGGCGTTAGCTTCAGGT 5 +GAGGGAACCTTGCCATCG GAGGAAACCTTGCCATCG 6 1 +GAGGTCCCTTGAGTTTCG 8 +GAGTCCAAAGATGGATTA 5 +GAGTCCAAAGCGAATAAT GAGTCCAAAGCGAATAAA,TAGTCCAAAGCGAATAAT 6 1,1 +GAGTCCAAAGGGGAGGTA 5 +GAGTCCAAAGTAGCCTCG 8 +GAGTCCAAAGTGCCTCAC GAGTCCAAAGTGCCTGAC 6 1 +GAGTTTCGCAAGAGGG 5 +GAGTTTCGGTAATCCC 10 +GATAAGACGGCTTACTCC 7 +GATAAGACGGTAACCCGT 7 +GATACAAACTATGGAAAT 5 +GATACAAACTGTAACGTT 7 +GATAGTGTTTACACTAAG 5 +GATCCGTCCACGCTAATA GATCCGCCCACGCTAATA 5 1 +GATCGGTACGCCAGCAGT GATCGGTACGCCAGCAGG 8 1 +GATCGGTTTATTCGGCCT GATCGATTTATTCGGCCT 7 1 +GATCTCAACCTGGCCTGT GATCTCAACCTGACCTGT 6 1 +GATCTGTGGTGGGTTGGT 5 +GATCTGTGGTTTTGTGTC 7 +GATGACCCTCGGCCCTTA 6 +GATGAGGTCTAGAAGTCC 6 +GATGAGGTCTTGACCCTC 7 +GATGAGGTCTTTGTTTAC 7 +GATGAGGTCTTTTAACAG 6 +GATGGGATTCCGGCACAT GATGGGATTCCGTCACAT 5 1 +GATGGGATTCTCGGTTTA 7 +GATGGTTTCTTTGAGGGT GATGGTTTCTTTGAGGTT 5 1 +GATGTAAAGGCCCTTCAG 7 +GATGTACACGCATCTCCC 7 +GATGTAGTTTCAGATTAC 12 +GATGTAGTTTCTTACTCC 8 +GATGTGAGCGAATAAT 7 +GATGTGAGGCAAAGCC GATGTGAGGCAAGGCC 11 1 +GATTAGACTAACTACT 10 +GATTAGACTGGCCTGT GATTAGACAGGCCTGT 6 1 +GATTCCGAGTATGGGCAC 5 +GATTCCGAGTCTTTCTTT 11 +GATTGAGGGTAACCTGAC 12 +GATTGAGGGTTAGCCTCG GATTGAGGGTTAGCTTCG 9 1 +GATTGGCGTTGGTCCCTT 5 +GATTGGCGTTTAGTCTAG 7 +GATTTAACAGAACCTGAC GATTTAAAAGAACCTGAC 5 1 +GATTTAACAGGATGTGAG 8 +GATTTAACAGTAACTACT 13 +GATTTAACAGTCATATGG 6 +GATTTAACAGTGCCTCAC GATTTAACAGTACCTCAC 5 1 +GCAGCATGACACTAAG GCAGCATGACACTCAG 6 1 +GCAGCATGATGGGCAC CCAGCATGATGGGCAC 7 1 +GCTTACCTCTACTTGT 8 +GGAAACAGTCCCAATC 6 +GGAGAAGCCCACATTA 5 +GGATAAAGAGAGACTA 5 +GGCATGCTTTCCGCTC 6 +GTCTCTCTAACTTAGC GTCTCTCTAACGTAGC 7 1 +GTCTCTCTATTTCCGG GTCTCTCTATTTCGGG 11 1 +GTCTCTCTCGAATAAT 8 +GTCTCTCTGGTCTGAC 5 +GTCTCTCTTGCGACAG 7 +GTCTCTCTTTGTGACT 8 +GTCTCTCTTTTATCAC 10 +GTGTAACCTGCGACAG 5 +GTTCAACTCCGTGTTT GTTCAACTCCGTTTTT 6 1 +GTTCAACTTACAGCCG 5 +GTTCAACTTGACCAGT 7 +GTTCAACTTGATGCCC 15 +GTTGCACGCGATTGAT 5 +GTTGCACGTGTAGTTT GTTGCACGTGTAGTTG 7 1 +GTTTGTTTCTCCGCAT 9 +GTTTGTTTGGAAACAG 5 +TAACTACTAGGCAACG 12 +TAACTACTTTAAGCGT 5 +TACAGCCGACCTTGCC 10 +TACAGCCGCCCATCTG TACAGCCGCCCCTCTG 6 1 +TACAGCCGGTTGCACG TACAGCCGGTTGCACA 11 1 +TACAGCCGTCGGTACG 7 +TACTTGTGTACAGCCG 8 +TACTTGTGTGTTCCAG TACTTGTGTATTCCAG 10 1 +TAGCTTATCGGCACAT 7 +TAGTCTAGCGACGTCA TAGTCTAGCGACGTCC 6 1 +TAGTCTAGTTATAGCC 5 +TAGTCTAGTTGATCTA 7 +TCATATGGTCTGCATC TCATATGGACTGCATC 5 2 +TCATCCTTCGTATTTC 10 +TCATCCTTTCCAGTCC TCATCCTTTCTAGTCC 6 1 +TCATCCTTTTCCGCTC TTATCCTTTTCCGCTC 8 1 +TCCTTATTCAGTCCCT 10 +TCCTTATTTAGTCTAG 6 +TCGAAGCTTAACCATC TCGAAGCTAAACCATC 7 1 +TCGAAGCTTGCCTCAC 8 +TCGAAGCTTTCCGAGT 6 +TGAAAACTGGTAACTTAGC 8 +TGAAAACTGGTCCCATCTG TGAAAACTGGTCCCATCTA 8 1 +TGAAAACTGGTCTTTAATC 6 +TGAAACAATCCATATGCAT 12 +TGAAACACACGCTTTCTTT TGAAACACACGCTTTCTTA 16 1 +TGAAACCCTTGAATGTTTG 6 +TGAAACCCTTGCAATTAGT 9 +TGAACATCTATGAGCCCAT TGAACATCTATAAGCCCAT,TGAACATCTATGAGCACAT 8 1,1 +TGAACGCTCAGTGACCCTC TGAACGCTCAGTGACTCTC 12 1 +TGAAGAAGTTAACTTCAAT 5 +TGAAGAAGTTACACAAGGC TGAAGAAGTTATACAAGGC 5 1 +TGAAGCGTAGGCCTACTAG 9 +TGAAGGAGCTTTCGAAGCT 7 +TGAAGGTTGTGAGGCAGTT 6 +TGAAGGTTGTGGCAGTCGA 8 +TGAAGGTTGTGGCTTTGGC TGAAGGTTGTGGCTTTTGC 5 1 +TGAAGGTTGTGGGAAACAG TGAAGGTTGTGAGAAACAG 7 1 +TGAATACCCAGTCGGTACG TGAATACCCAGTCGGTATG 11 1 +TGAATGAAGCACATTTGTT 6 +TGAATGACTTTGAATACGC 9 +TGAATGACTTTTAACTACT 9 +TGAATGCATGGCATCGCAG 5 +TGAATGCATGGCATCTCCC 10 +TGAATTCGACGGCGTTGCT 10 +TGACAACAAATTAGTGTTT TGAAAACAAATTAGTGTTT 7 1 +TGACAAGTCATATTCCTTG TGACAAGTCATTTTCCTTG 6 1 +TGACAAGTCATTTAAGCGT 7 +TGACAATACTTATTCCTTG 16 +TGACACAACAGGAGCGGTA 7 +TGACACAACAGTGATGCCC 6 +TGACACAGTTTATATCTTC 9 +TGACACAGTTTTGTACACG 9 +TGACACTTTCTAGCACCAC 5 +TGACACTTTCTGACGATGG 9 +TGACAGACCATGGTCCCTT TGACAGACCATAGTCCCTT 10 1 +TGACATACATTTACAAACT 10 +TGACCAGCAGTCCACATTA 6 +TGACCAGTGTCAAAGA 7 +TGACCATTACTCTTACTCC 11 +TGACCATTACTGTCCAAAG TGACCATTACTTTCCAAAG 11 1 +TGACCTCGTCTAGGAAGAC 7 +TGACCTCGTCTCGAGATGT 9 +TGACCTCGTCTTGCTATTT TGACCTCATCTTGCTATTT,TGACCTCGTCTTCCTATTT,TGACCTCGTCTTGCTATTC 6 1,1,1 +TGACCTCGTCTTTCACATA 11 +TGACGTAGAAGCAACAAAT 9 +TGACGTAGAAGTTATAGCC 9 +TGACTACCGTTTTTGGGAG 6 +TGACTACGAGCCCTACTAG 7 +TGACTCAGAATCATCTCCC 10 +TGACTCAGAATTTATAGCC 6 +TGACTGTTCTTACTTCAAT TGACTGTCCTTACTTCAAT 8 1 +TGAGAAGCACTCCCATCTG TGAGAAGCACTCCCATGTG 11 1 +TGAGAAGCACTGTTTGTTT 6 +TGAGACAAAGGATTTCCGG 8 +TGAGACAACTTCAGTTTGC 8 +TGAGAGTTCACTCGACACC 13 +TGAGCAGGGTATTTGTGTC TGAGTAGGGTATTTGTGTC 13 1 +TGAGCCGGATTATCCCACG 8 +TGAGCGCATTCCCTATTCA TGAGCGCATTCGCTATTCA 6 1 +TGAGCGCATTCCGTCAGCA 8 +TGAGCGCATTCTCGGTACG 11 +TGAGGAATAGAGGCATGCT 6 +TGAGGAATAGATAGCTTAT 7 +TGAGGTTTCTCAGCACCAC TGAGGTTTCTCAGCACCAT 10 1 +TGAGGTTTCTCGATTGCGA TGAGGTTTCTCAATTGCGA 5 2 +TGAGTACACTCGAATACGC 6 +TGAGTCCATGTCAAGAGGG 11 +TGAGTCCATGTGGCGGTTT TGAGTCCATGTGGTGGTTT 5 1 +TGAGTCCATGTTCTCAACC TGAGTCCATGTTCTCAAAC 11 1 +TGAGTCCGTACGTTGTCAT TGAGTCCGTACATTGTCAT 5 3 +TGAGTCGTCGTGAGCCCAT 5 +TGAGTGGAGCTATGACCGA 9 +TGAGTGGAGCTCCCAAGCA 7 +TGAGTGGAGCTCTTCTTCG 7 +TGAGTGGAGCTGCATGGGT TGAGTGGAGCTACATGGGT,TGAGTGGCGCTGCATGGGT 6 2,1 +TGAGTGGAGCTTCCCAATC 12 +TGAGTGGAGCTTGCAAGGG 11 +TGAGTGGAGCTTTTAACAG 5 +TGAGTTCTGTTTCGACACC TGAGTTCTGTTTCGACACT 5 1 +TGATAACCCGTATTCAGTA 11 +TGATAACCCGTCCTATTCA 8 +TGATAACCCGTTTCGGCCT 14 +TGATACGTGCTAGTGAAAG 7 +TGATACGTGCTCGAGATGT 5 +TGATACGTGCTGTTCAACT 10 +TGATAGAAATGGGTGGGAT TGATAGAAATGGGTGGAAT 6 1 +TGATAGAAATGTGACCCTC TGATAGAAATGAGACCCTC,TGATAGAAATGTGACCCTT 10 2,1 +TGATATTGCCTAAATCAGA 14 +TGATCACCGAGCGCTAATA 7 +TGATCACCGAGTAGTCTAG 5 +TGATCCCGTAGCCGCTGTT TGATCCCGTAGGCGCTGTT 10 1 +TGATCCCGTAGGCTCTAGT TGATCCCGTAGACTCTAGT 7 1 +TGATCCCGTAGTTGCATAT TGATCCCGTCGTTGCATAT 13 1 +TGATCGTCATGTGCGACAG 6 +TGATGTCTTTCGGTGGGAT 7 +TGATGTTTGAGACACTAAG TGATGTTTGAGGCACTAAG 9 1 +TGATTCGCTGGCGAATAAT 7 +TGATTCGCTGGGCATGGGT TGATTCGCTGGACATGGGT 5 1 +TGATTCGCTGGTTAAACTG 11 +TGATTGCACGCTGTGCTCA 8 +TGATTGTCGCCCGGCACAT 6 +TGATTTACCCTAGAGACTA TGATTTACCCTAGAAACTA 12 1 +TGATTTACCCTGAATACGC 7 +TGCCTCACCTTCTGGA 5 +TGCGACAGCTTTCTTT 5 +TGCGACAGGGCGTTAG 6 +TGGCTACCTAGCTTAT 9 +TGGCTACCTGCCATCG 7 +TGGTATGAAGAGACTA 8 +TTAAGCGTGACAAAGG 8 +TTAAGCGTGAGCCCAT 6 +TTATCTGTATGGGCAC 8 +TTATCTGTCCCGAATG TTATCTGTCCCGAATA 6 1 +TTATCTGTCGAATAAT 10 +TTATCTGTTTATCTGT 7 +TTGCATATAGGCAACG 5 +TTGCATATCGAATAAT 10 +TTGGACTTGAGTTTCG 15 +TTTGTGTCCAGATTAC 8 diff --git a/tests/indrop_whitelist_ed_above_thres_corr.tsv b/tests/indrop_whitelist_ed_above_thres_corr.tsv index fd4c1466..0bbbb17a 100644 --- a/tests/indrop_whitelist_ed_above_thres_corr.tsv +++ b/tests/indrop_whitelist_ed_above_thres_corr.tsv @@ -1,25 +1,36 @@ +AAAACCTCCCGACTCCT AAAACCTCCCGACACCT 3 1 +AAAACCTCCTGTACACG 3 AAAACGCCTATGACTTT 5 AAAACTCGACATCGCAG 5 AAAAGTCGGTCCTTATT 6 AAAATCGTTGCTCTAGT AAAATCGTTACTCTAGT 6 1 +AAAATCGTTGGTGGGAT AAAATCGTTAGTGGGAT 4 1 AAAGGTAATACACTAAG 10 AAAGTAATCAGAACGGG 7 +AAAGTAATCCGGCACAT AAAGTAATCAGGCACAT 3 1 AAATCGGGTCACGGACT 7 AAATCGGGTGGCGTTAG AATGCGGATGGCGTTAG 11 1 AAATGAATGTGTAGTTT AAATGAATGTGTGGTTT 9 1 +AAATGACCCGAGCGGTA AAATGACCCAAGCGGTA 4 1 +AAATGACCCTAACCATC 3 AACCCATATTAACCATC 15 +AACGAAACGATGGAAAT 3 +AACGAAACGGAGCCCAT 3 AACGAAACGGTTCAACT 15 AACTCACCGAGGAAGAC 9 AACTCACCGCCTATTCA 9 +AACTCACCGTGACCCTC AACTCACCGTGACGGAC 4 1 AACTGAGTTTGCCTCAC 7 +AACTTAGCGTCCAAAG 4 AAGACACCACTAGATTG AAAACGCCTCTAGATTG 7 1 -AAGATGGCTATGGGCAC 11 AAGATGGCTGATCTCGG 6 AAGATGGCTTGATGCCC 7 AAGATGGCTTGGCTACC AAGATGGCTAGGCTACC 11 1 AAGCGAAGTATTCAGTA 10 AAGCGAAGTCTCCGCAT 8 AAGCGAAGTTCACCGAG 5 +AAGCGCCTTGCTTTGGC AAGCGCCTTACTTTGGC 4 1 +AAGCGCCTTGGTGGGAT 3 AAGCGCCTTTGTTTGAG 6 AAGCTTCTGCTTACCT 7 AAGTGAAAGATATGCAT AAGTGAAAGTCATGCAT 5 1 @@ -27,59 +38,87 @@ AATACTCTTAATTCCCA 13 AATACTCTTACACTAAG 10 AATACTCTTCACAAGGC AATACTCTTGACAAGGC 5 1 AATATACCTAGGCAGTT 8 +AATATCTTCATGACTTT 3 +AATATCTTCGGGAAATC AATATCTTCAGGAAATC 3 1 AATCCCACGATCAGCGC 11 AATCCCACGCACGGACT 9 +AATGCGGATTCCAGTCC AATGCGGATTCCAGTCA,AATGCGGATTCCTCCCC 3 1,1 AATGTTTGCCCTTCAG 6 AATGTTTGGGGAACCT AATGTTTGAGGAACCT 5 2 +ACAGATTACCGAACGTA 4 +ACAGATTACCTTACTCC 3 ACAGGTGTCAAATCAGA 9 +ACAGGTGTCCCCAAGCA 3 +ACCATTTGAGTTGTCAT ACCCTTGGTGTTGTCAT 4 1 ACCATTTGATCCCAATC ACCCTTGGTTCCCAATC 7 1 ACCCACGAGGGTTGGT ACCCACGAAGGTTGGT 9 1 +ACCCACGATAACTACT 4 +ACCCGACTTGCAAGGAC ACCCGACTTACAAGGAC,ACCCGACTTGCAAGGGA 9 1,1 ACCCGACTTGTTGTCAT ACCCGACTTATTGTCAT 6 1 -ACCCGACTTTGCAAGGG 9 ACCCTAACCATGGATTA ACCGCAACTATGGATTA 10 1 ACCCTTGGTAAGCGTAC 5 +ACCCTTGGTATGTTGGC 4 +ACCGCAACTAGAGGTGG 4 ACCGTGTTTTGGTTTCT ACCGTGTTTTAGTGTTT,ACCGTGTTTTGAGGTCT 10 1,1 ACCTTCTTACTTCAAT 6 ACCTTCTTCTTTCTTT TCATCCTTCTTTCTTT 10 1 ACCTTCTTTTGGATCG ACCTTCTTTTGGCTTG,CCCATCTGTTGGATCG 12 1,3 ACGCTCTCAACTAGCCA ACGCTCTCAGTTAACCA 6 1 ACGTGCTAGTCGGTTTA 17 +ACTACTTGTTTCCGAGT 4 ACTAGATTGACTAGGAT 11 ACTAGATTGTCGGTACG ACTAGATTGTCGGTAGC 14 1 +ACTGCGTTGAAGCTTCT 3 ACTTAGGTAAGTGATGC 7 ACTTCAATCTTTAATC ACTTCAATCTTTTTTT 5 1 ACTTCAATTGGGATTC 14 +AGAACGATTTGTAAAGG 3 AGAGACTAACTGCCGT 8 +AGAGACTACCGCTGTT 3 AGAGACTATGAAATGA AGAGACTATGTAATGA 11 1 AGAGACTATGACGGAC 10 AGAGGTGGCCCATCTG 9 AGAGGTGGCGACGTCA 5 AGATGTATTAAGCTTCT AGATGTATTACGCTTCT 7 1 AGATGTATTCCACATTA 8 +AGATGTATTCGATTGAT AGATGTATTCCCTTGGT 3 1 AGATGTATTTGTACACG 10 AGCAACCTGCGGCTTAC 8 +AGCACCACCATCTCCC 4 AGCACCTCTTACCAGGC 6 +AGCCAAGATGTTGTCAT AGCCAAGATGTTGTATC 4 1 AGCCAAGATTGAGGTCT 13 AGCCTCTTTAATTCCCA AGCCTCTTTCAGTCCCT 10 8 +AGCTTTCCATACAGCCG 4 AGGCAACGTCATATGG 13 AGGCAACGTTTAACAG 9 AGGCCTAAGGGTGGGAT 5 AGGGAACGACTTCGCAC 9 AGGGAACGATTGCATAT 9 AGGGAAGGTGGCGTTAG CGGGAAGGTGGCGTTAG 9 1 +AGGGAAGGTTACTTGTG 3 AGGGCCAATTCTCACTT AGGGCCAATTCACGTTT 5 1 AGGGTGTATCAGTTTGC 8 +AGGGTGTATCTTGGTGT 4 AGGGTGTATGACAGATA AGGGCCAATGACAGATA,CGGGTGTATGACAGATA 5 1,1 +AGGTGACACCGAGATGT 4 AGGTTAGTGAAGGTAAT 10 AGGTTAGTGTCGAAGCT AGGTTAGTGACCACGCT,CGGTTAGTGTCGAAGCT 6 3,1 +AGTATGAGTAGAGGTGG 4 AGTATGAGTCCCATCTG AGTATGAGTCCCGACTT 5 1 AGTATGAGTTGCCTCAC AGTATGAGTAGCCTCAC 7 1 AGTCAAAGACCTGACAC AAGCAAAGTCCTGACAC 7 1 AGTCAATACACTGCCGT AGCCAAGATACTGCCGT,CGTCAATACACTGCCGT 5 1,1 +AGTCAATACTTGCATAT 4 +AGTGGATGGGAGAGTAT 3 AGTGTCGGACGGGCTTT 6 AGTTTACGTACTAGGAT 5 +AGTTTACGTCCCTAACC 4 +AGTTTACGTTGACGGAC AGTTTACGTTGACCCTC 3 1 +AGTTTACGTTTAAACTG 4 ATAGTCGCAATCAAGTG 6 ATAGTGGACCGAGATGT ATAGTGGACGGAGATGT 6 1 +ATATGCATGTACCTTG 4 ATCACGTTTAATGACCC ATCACGTTTACTGACCC,ATCACGTTTCCTGACAC 10 1,1 ATCACGTTTCCACATTA 8 ATCACGTTTCGAATAAT 5 @@ -88,29 +127,40 @@ ATCCGCTATGTAAAGG 6 ATCGGTTCGAACCTGAC 11 ATCGGTTCGCAAGTCAT 6 ATCGGTTCGGATTAGAC ATCGGTTCGGATTAACA,ATCGGTTCGTTTTAGAC 10 1,1 -ATCTTTGTGTCCATGT ATCTTTGTATCCATGT,ATCTTTGTCTTCAGGT 6 1,3 +ATCTGCATCAGCAGAAC GTCTGCATCAGCAGAAC 4 1 +ATCTTTGTGTCCATGT ATCTTTGTCTTCAGGT 6 3 ATCTTTGTTAACCATC ATCTTTGTTGACCAGT 7 1 ATGCTCCGTGTTACGAT 9 +ATGCTCCGTTTATCTGT 4 ATGGCCTGTAATGTTTG ATGCTCCGTAATGTTTG 9 1 ATGGCCTGTAGCACCAC 7 +ATGGCCTGTGTTGTCAT 4 +ATGGCCTGTTGACGGAC 3 ATGTGTCCTATGGAAAT ATGTGTCCTGTGGAAAT 13 1 ATGTGTCCTGCTTTGGC ATATGTCCTGCTTTGGC 6 1 ATGTGTCCTTAACCATC 6 ATTCCAGACTTGTGACT ATTCCAGACTTGAGGGT 7 1 +ATTGATTCTATCAAGTG ATTGATTCTGTCAAGTG 3 1 +ATTGATTCTGTTAACCA ATTGATTCTATTAACCA,TTTGATTCTGTTAACCA 4 1,1 ATTGATTCTTGTAAAGG ATTGATTCTTGTAAAGA 6 1 +ATTGGATCGATGTCGGA ATTGGATCGGTGTCGGA 4 3 ATTGTGACTTCCGTCCA 6 ATTTCCGGCGAATAAT ATTTCCGGCAATTAGT,CTTTCCGGCGAATAAT 8 2,1 ATTTCCGGTACTTGTG 9 CAGTTTGCGGCGTTAG CAGTTTACGGCGTTAG 7 1 CATCGCAGCGACTCCT 5 CATCGCAGCTTCAGGT 6 +CATCGCAGTAGCCTCG 4 CATCGCAGTTATAGCC 14 CCAACCGTCCACATTA 7 CCAACCGTCGGGCTTT 9 CCAACCGTGTTCAACT 12 CCAACCGTTAGTCTAG 7 +CCCATCTGTCTGTGGT CCCATCTGTCCGTGGT,CCCATCTGTCTGTGAT 4 1,1 CCGCTGTTATCAGCGC 8 +CCGCTGTTTGCAAGGG CCGCTGTTCGCAAGGG,CCGCTGTTTGCAAAGG,CCGCTGTTTGCCATCG 4 1,1,1 CGAACGTAATACTCTT CAAACGTAATACTCTT 7 1 +CGAACGTACATTTGTT 4 CGAACGTAGGCGTTAG CGAACGTAGGAGTTAG 9 1 CGAACGTATCCCAATC CGAACGTATGCGTATC 10 1 CGCTAATAGCATGGGT 5 @@ -119,9 +169,13 @@ CGGCTTACAGAGGTGG 6 CGTATTTCTTGCATAT CGTAATTCTTGCATAT 9 1 CTATAGAGCGAACGTA 5 CTATAGAGTGCCATCG CTATAGAGTGCTATTT 6 1 +CTATAGAGTTTGTGTC CTATAGAGTTTGTGTA 4 1 +CTTACGGGCCCATCTG 4 +CTTACGGGCGGCACAT 4 CTTACGGGTAGCTTAT CTTCAGGTTAGCTTAT 10 9 CTTCAGGTGGATAAAG 9 CTTCGATTCTCGCGTA 6 +CTTCGATTGAAGTGCC 4 CTTCGATTGCGTTGCT CTTCGATTGAGTTTCG,CTTCGATTTCGTTGCT 7 1,1 CTTCGATTTAAATAGG CTTCGATTTAAATAGT 11 1 CTTTAATCCGCTCTCA 6 @@ -131,78 +185,111 @@ CTTTCTTTGGAGAAGC CTTTCTTTAGAGAAGC 5 2 GAAAGATTGTGCTTACCT GAAAGATTGTACTTACCT 8 3 GAAAGCGTACGTTAACCA GAAAGCGTACATTAACCA 8 1 GAACACTAAGATGGAAAT GAACACTAAGTTGGACTT 8 3 +GAACACTAAGCCCAAGCA 3 GAACACTAAGTGGTATGA 7 GAACGCCATTATTTCCGG 8 GAACGCCATTCATCGCAG 11 GAACGCCATTTCTCAACC 13 +GAACTAGGATTGCAAGGG 4 GAACTGCCGTGTCAATAC GAACTGCCGTTTCAATAC 11 1 GAAGGAAGACCAGATTAC 6 GAAGGCAGTTATATGCAT 9 GAAGGCAGTTCAGTTTGC 10 GAAGTTTAGAAGGCAACG 11 +GAATCAAGTGGGATAAAG 4 GAATTCCTTGATTGGGCC 6 GAATTCCTTGTGACCCTC GAATTCCTTGAGACCCTC 7 1 +GACAATTAGTCTATAGAG GACAATTAGTCTATAAAG 3 1 GACACGGACTACCTTGCC 7 GACACGGACTCACGGACT GACACGGACTCGCGGACT,GATACAAACTCACGGACT 9 1,3 +GACACGGACTCGGGCTTT 3 GACACGGACTTTAAGCGT 5 GACAGTCCCTTAACCATC 6 GACATTTGTTGAATACGC GACATTTGTTGAATACGA 13 1 GACATTTGTTTTTAACAG 7 +GACCACATTACAATTAGT 3 GACCACATTATGCAAGGG 9 GACCCGAATGAATGACCC 8 +GACCCGAATGGCATGGGT 3 GACCCGAATGTGACCAGT 7 +GACCCTTCAGATTCCTTG GACTCTTCAGATTCCTTG 4 1 GACCTACTAGCCTACTAG GACCTACTAGCCTACTAA 11 1 GACCTACTAGGGTGGGAT GACCTACTAGAGTGGGAT 11 1 GACCTACTAGTGCGTATC 6 +GACCTACTAGTTCACATA GACCTATTAGTTCACATC 4 1 +GACCTGACACTAACCATC 4 GACGAATAATAAGATTGT 5 +GACGACTCCTGTTAACCA 4 GACGACTCCTTGACCCTC TACGACTCCTTGACCCTC 5 1 GACGACTCCTTGGTTTCT 5 GACGAGATGTCACGGACT 5 +GACGAGATGTGAGGCTGA GACGAGATGTAAGGCTGA 4 1 GACGAGATGTGGATAAAG 6 GACGAGATGTGTTTGTTT GACGCGATGTGTTTGTTT 15 1 GACGAGATGTTCGGTTTA 8 GACGAGATGTTGCGTATC GATGAGAAGTTGCGTAAC 5 1 +GACGCTAGTCGGCATGCT GACGCTAGTCAGCATGCT,GACGCTAGTCGTCATATT 3 1,1 GACGCTAGTCTGAGGTCT 9 GACGTCAGCACGTACCTA 5 +GACTCTTGACGCTTTGGC GACTCTTGACACTTTGGC 3 1 GACTGTCTGGCTCTTGAC GACTGTCTGGCTCTTGAA 8 1 GACTGTCTGGTACAGCCG 6 +GACTTACTCCATTACGAT 3 GACTTACTCCTCCGTCCA 9 +GACTTCTTCGAGGCAGTT GACTTCTTCGCCGCTGTT 4 1 GACTTCTTCGATCCCACG 6 GAGAAGGCTTACATCTAT 7 GAGAAGGCTTGAATACGC GAGAAGGCTTAAATACGC 7 1 +GAGAAGGCTTTCCAGTCC 3 GAGAATACGCCGGCACAT 5 GAGAATTCGTATGGGCAC 6 +GAGAATTCGTGAGAATTG 4 GAGACAGATAAGTTTAGA 6 +GAGACAGATATACAGCCG GAGACAGATATACAGCCA 4 1 +GAGACGATGGCCTGTTAT 4 GAGAGAATTGATATGCAT 7 GAGAGAATTGATGGAAAT 6 GAGAGAATTGGATCGTTT 5 GAGAGAATTGGCATGGGT 8 GAGAGAATTGTACTTGTG GAGAGAAATGTACCAGTG 5 1 GAGAGAGTATATTCGACG GAGAGAGTATATTCCTTG 6 1 +GAGAGAGTATGCGTTGCT 3 +GAGAGCCCATGTAACGTT 4 GAGAGCCCATTTTGTGTC 8 GAGATCTCGGCAATTAGT 5 +GAGATCTCGGGCATGGGT 3 GAGATCTCGGTGTAGTTT GAGATCTCGGTGTATTTG,GAGGTCTCGGTGTAGTTT 9 1,1 GAGATTGCGACTAGTAAC 5 GAGATTGCGAGTTACGAT GAGATCTCGGGTTACGAT,GAGATTGCGAATTACGAT 5 1,1 GAGCGTTGCTATCAAGTG 8 GAGCGTTGCTATTCCTTG 9 GAGGCGTTAGCTTCAGGT 5 +GAGGCGTTAGTTGATTCT 3 GAGGGAACCTTGCCATCG GAGGAAACCTTGCCATCG 6 1 +GAGGGAGGTACTAGTAAC 3 GAGGTCCCTTGAGTTTCG 8 +GAGGTCCCTTTACTTGTG 3 GAGTCCAAAGATGGATTA 5 GAGTCCAAAGCGAATAAT GAGTCCAAAGCGAATAAA,TAGTCCAAAGCGAATAAT 6 1,1 GAGTCCAAAGGGGAGGTA 5 GAGTCCAAAGTAGCCTCG GAGTCCAAAGTAGTCGCA 8 1 +GAGTTAACCAGATCTCGG GAGTTAACCAAATCTCGG 3 1 GAGTTTCGCAAGAGGG GAGTTTCGCGAGATGT 5 1 GAGTTTCGGTAATCCC 10 +GATAACCATCTCGAAGCT 3 GATAAGACGGCTTACTCC 7 GATAAGACGGTAACCCGT 7 GATACAAACTATGGAAAT GATACAAACTATGAAATG 5 1 GATACAAACTGTAACGTT GATACAAACTGTTCTGTT 7 1 GATAGTGTTTACACTAAG 5 +GATAGTGTTTTACCAGGC 4 +GATCCCAATCAATGTTTG 3 GATCCGTCCACGCTAATA GATCCGCCCACGCTAATA 5 1 GATCGGTACGCCAGCAGT GATCGGTACGCCAGCAGG 8 1 +GATCGGTACGTTGATCTA GATCGGTACGTTGATTAA 4 1 GATCGGTTTATTCGGCCT GATCGATTTATTCGGCCT 7 1 +GATCGTTTGAGCGGTA GATCGTTTAAGCGGTA 4 1 +GATCTCAACCACTGAGTT 3 GATCTCAACCTGGCCTGT GATCTCAACCTAGCCTCG,GATCTCAACCTGACCTGT 6 3,1 GATCTGTGGTGGGTTGGT 5 GATCTGTGGTTTTGTGTC 7 @@ -211,20 +298,31 @@ GATGAGGTCTAGAAGTCC 6 GATGAGGTCTTGACCCTC GATGAGGTCTTTCCGCTC 7 1 GATGAGGTCTTTGTTTAC GATGAGGTCTTTGATTCT 7 1 GATGAGGTCTTTTAACAG 6 +GATGCCATCGCTTCAGGT 3 +GATGGAGGAGGATGTGAG GATGGAGGAGAATGTGAG 3 1 GATGGGATTCCGGCACAT GATGGGATTCCGTCACAT 5 1 GATGGGATTCTCGGTTTA 7 +GATGGTTTCTGCAAGGAC GATGGTTTCTACTAGGAT 4 1 +GATGGTTTCTTAGAAATG 4 GATGGTTTCTTTGAGGGT GATGGTTTCTTCGAAGCT,GATGGTTTCTTTGAGGTT 5 2,1 GATGTAAAGGCCCTTCAG 7 +GATGTAAAGGGGATAAAG 3 +GATGTACACGAAAGCCTA 4 GATGTACACGCATCTCCC 7 GATGTAGTTTCAGATTAC 12 +GATGTAGTTTCTGTCTGG 4 GATGTAGTTTCTTACTCC 8 +GATGTGAGAGGAAGAC 3 GATGTGAGCGAATAAT GATGTGAGCGAATATG 7 1 GATGTGAGGCAAAGCC GATGTGAGGCAAGGCC 11 1 +GATGTGAGTCGGTTTA GATGTGAGTCGATTTA,GATGTGAGTCGGTACG 4 1,3 +GATGTTCCAGATTGGGCC 4 GATTAGACTAACTACT 10 GATTAGACTGGCCTGT GATTAGACAGGCCTGT 6 1 GATTCCGAGTATGGGCAC 5 GATTCCGAGTCTTTCTTT 11 GATTGAGGGTAACCTGAC 12 +GATTGATCTAAGTTTAGA 4 GATTGGCGTTGGTCCCTT GATTGGCGTTTGACCCTC 5 1 GATTGGCGTTTAGTCTAG 7 GATTTAACAGGATGTGAG 8 @@ -232,20 +330,28 @@ GATTTAACAGTAACTACT 13 GATTTAACAGTCATATGG 6 GCAGCATGACACTAAG GCAGCATGACACTCAG 6 1 GCAGCATGATGGGCAC CCAGCATGATGGGCAC 7 1 +GCAGCATGGATCTCGG GTTGCACGGATCTCGG 3 1 +GCATGGGTTAACCCGT GCATGGGTCCAACCGT 4 1 GCTTACCTCTACTTGT GCTTACCTCTTCTTCG 8 1 +GCTTTGGCACTAGCCA 4 +GGAAACAGACTAGGAT 3 GGAAACAGTCCCAATC GGAAACAGTACCAGGC,TGCGACAGTCCCAATC 6 3,1 GGAGAAGCCCACATTA 5 GGATAAAGAGAGACTA 5 GGCATGCTTTCCGCTC 6 +GGGTTGGTGTTCAACT GGGTTGGTGTTCAATC 4 1 GTCTCTCTAACTTAGC GTCTCTCTAACGTAGC 7 1 +GTCTCTCTATATGCAT 3 GTCTCTCTATTTCCGG GTCTCTCTAATTCCCA,GTCTCTCTATTTCGGG 11 1,1 GTCTCTCTTGCGACAG 7 GTGTAACCTGCGACAG 5 +GTTACGATATGGAAAT 4 GTTCAACTCCGTGTTT GTTCAACTCACAGTTT,GTTCAACTCCGTTTTT 6 1,1 GTTCAACTTACAGCCG GCTCTAGTTACAGCCG,GTTCAACTTACCGGCA 5 1,1 GTTCAACTTGACCAGT 7 GTTCAACTTGATGCCC 15 GTTGCACGCGATTGAT 5 +GTTGCACGGTCCGTAC 3 GTTGCACGTGTAGTTT GTTGCACGTGTAGTTG 7 1 GTTTGTTTCTCCGCAT GTTTGTTTTTCCGCTC 9 1 GTTTGTTTGGAAACAG 5 @@ -256,7 +362,11 @@ TACAGCCGGTTGCACG TACAGCCGGCAGCATG,TACAGCCGGTTGCACA 11 1,1 TACAGCCGTCGGTACG 7 TACTTGTGTACAGCCG 8 TACTTGTGTGTTCCAG TACTTGTGTATTCCAG 10 1 +TAGCCTCGCCGCTGTT TAGCCTCGCCGCTTTT 3 1 +TAGCCTCGGCAGTCGA 3 +TAGCCTCGTCGACGGT TAGCCTCGTCGAGTGC 4 1 TAGCTTATCGGCACAT 7 +TAGCTTATTGTAAAGG 3 TAGTCTAGCGACGTCA TAGTCTAGCGACGTCC 6 1 TAGTCTAGTTATAGCC 5 TAGTCTAGTTGATCTA AGGCCTAGTTGATCTA 7 1 @@ -267,14 +377,17 @@ TCATCCTTTTCCGCTC TCCTTATTTTCCGCTC,TTATCCTTTTCCGCTC 8 1,1 TCCTTATTCAGTCCCT 10 TCCTTATTTAGTCTAG 6 TCGAAGCTTGCCTCAC 8 -TCGAAGCTTTCCGAGT TCGAAGCTTTCGGCCT 6 3 +TCGAAGCTTTCCGAGT 6 TGAAAACTGGTAACTTAGC 8 TGAAAACTGGTCCCATCTG TGAAAACTGGTCCCATCTA 8 1 TGAAAACTGGTCTTTAATC 6 +TGAAAATGTCGGATTGCGA TGAAAATGTCGAATTGCGA 4 2 TGAAACAATCCATATGCAT 12 TGAAACACACGCTTTCTTT TGAAACACACGCCGTGTTT,TGAAACACACGCTTTAATC,TGAAACACACGCTTTCTTA 16 1,1,1 +TGAAACACACGTACCGGCA 3 TGAAACCCTTGAATGTTTG 6 TGAAACCCTTGCAATTAGT 9 +TGAAACCCTTGGGAGAAGC TGAAACCCTTGAGAGAAGC 4 1 TGAACATCTATGAGCCCAT TGAACATCTATAAGCCCAT,TGAACATCTATGAGCACAT 8 1,1 TGAACGCTCAGTGACCCTC TGAACGCTCAGTGACCTCA,TGAACGCTCAGTGACTCTC 12 1,1 TGAAGAAGTTAACTTCAAT 5 @@ -284,41 +397,55 @@ TGAAGGAGCTTTCGAAGCT 7 TGAAGGTTGTGGCAGTCGA 8 TGAAGGTTGTGGCTTTGGC TGAAGGTTGTGGCTTTTGC 5 1 TGAAGGTTGTGGGAAACAG TGAAGGTTGTGAGAAACAG 7 1 +TGAATACCCAGGGAATAGA 3 TGAATACCCAGTCGGTACG TGAATACCCAGTCGGTATG 11 1 TGAATGAAGCACATTTGTT TGAATGAAGCACATTTTTA 6 1 -TGAATGACTTTGAATACGC TGAATGACTTTAACTACTC 9 3 +TGAATGAAGCAGAAAGACC GGAATGAAGCAGAAAGACC 3 1 +TGAATGACTTTACCTTCTT TGAATGACTTTTCAATCTT 3 1 +TGAATGACTTTGAATACGC 9 TGAATGCATGGCATCTCCC TGAATGCATGGCATCGCAG 10 5 TGAATTCGACGGCGTTGCT 10 TGACAACAAATTAGTGTTT TGAAAACAAATTAGTGTTT 7 1 TGACAAGTCATTTAAGCGT 7 TGACAATACTTATTCCTTG TGACAAGTCATATTCCTTG,TGACAATACTTATTTCCGG 16 6,1 TGACACAACAGGAGCGGTA 7 +TGACACAACAGGCCTCTTT 4 TGACACAACAGTGATGCCC 6 TGACACAGTTTATATCTTC TGACACAGTTTCTTTCTTT 9 1 TGACACAGTTTTGTACACG 9 TGACACTTTCTAGCACCAC 5 TGACACTTTCTGACGATGG 9 +TGACACTTTCTTAACTACT 3 +TGACAGACCATCCTATTCA TGACAGACCATCCTACTAG 4 2 TGACAGACCATGGTCCCTT TGACAGACCATAGTCCCTT 10 1 +TGACATACATTGAGTTTCG TGACATACATTAAGTTTCG 3 1 +TGACATACATTGGACTTCC TGACAGACCATGGACTTCC,TGACATACATTAGACTTCC 4 1,1 TGACATACATTTACAAACT TGACATACATTAACAATCC 10 1 TGACCAGCAGTCCACATTA TGACCATTACTCCACATTA 6 1 TGACCAGTGTCAAAGA 7 TGACCATTACTCTTACTCC 11 TGACCATTACTGTCCAAAG TGACCATTACTTTCCAAAG 11 1 +TGACCATTACTTGTAAAGG TGACCAGTACTTGTGT 3 1 TGACCTCGTCTAGGAAGAC 7 TGACCTCGTCTCGAGATGT 9 TGACCTCGTCTTGCTATTT TGACCTCATCTTGCTATTT,TGACCTCGTCTTCCTATTT,TGACCTCGTCTTGCTATTC 6 1,1,1 TGACCTCGTCTTTCACATA 11 TGACGTAGAAGCAACAAAT TGACGTAGAAGCGAATAAT 9 1 TGACGTAGAAGTTATAGCC TGACGTAGAAGTTATACCA,TGACTCAGAATTTATAGCC 9 1,6 +TGACGTATCGGGAAAGACC TGACGTATCGGGAAACTCC 3 1 TGACTACCGTTTTTGGGAG 6 TGACTACGAGCCCTACTAG 7 TGACTCAGAATCATCTCCC 10 TGACTGTTCTTACTTCAAT TGACTGTCCTTACTTCAAT 8 1 +TGACTTGGTGTAGAAGTCC 3 +TGACTTGGTGTTTAAGCGT 4 +TGAGAAGCACTCAGTCCCT TGAGAAGCATTCAGTCCCT 3 1 TGAGAAGCACTCCCATCTG TGAGAAGCACTCCCATGTG 11 1 TGAGAAGCACTGTTTGTTT 6 TGAGACAAAGGATTTCCGG TGACACAAGGCATTTCCGG 8 1 TGAGACAACTTCAGTTTGC TGAGACAACTTCGGCTTAC 8 2 TGAGAGTTCACTCGACACC 13 +TGAGATTTACCCTAGTAAC TGAGATTTACCGTAGTAAC 3 1 TGAGCAGGGTATTTGTGTC TGAGTAGGGTATTTGTGTC 13 1 TGAGCCGGATTATCCCACG 8 TGAGCGCATTCCCTATTCA TGAGCGCATTCGCTATTCA 6 1 @@ -326,54 +453,74 @@ TGAGCGCATTCCGTCAGCA 8 TGAGCGCATTCTCGGTACG 11 TGAGGAATAGAGGCATGCT 6 TGAGGAATAGATAGCTTAT TGAGGAATAGAGAGCCCAT 7 1 +TGAGGGAAATCGAGCCCAT 4 TGAGGTTTCTCAGCACCAC TGAGGTTTCTCAGCACCAT 10 1 TGAGGTTTCTCGATTGCGA TGAGGTTTCTCAATTGCGA 5 2 +TGAGTAATCCCTAGCCTCG TGAGTAATTCCTAGCCGCG 4 1 TGAGTACACTCGAATACGC 6 +TGAGTCCATGTAAACGCCT TGAGTCCATGTAAAAGACC 3 1 TGAGTCCATGTCAAGAGGG 11 +TGAGTCCATGTCGGCTTAC 4 TGAGTCCATGTGGCGGTTT TGAGTCCATGTGGTGGTTT 5 1 -TGAGTCCATGTTCTCAACC TGAGTCCATGTTCTCAAAC,TGAGTCCATGTTGGCTACC 11 1,1 +TGAGTCCATGTTCTCAACC TGAGTCCATGTTCTCAAAC 11 1 TGAGTCCGTACGTTGTCAT TGAGTCCGTACATTGTCAT 5 3 TGAGTCGTCGTGAGCCCAT TGAGTCGTCGTCAGTCCCT 5 1 TGAGTGGAGCTATGACCGA TGAGTGGAGCTATCAACGC 9 1 TGAGTGGAGCTCTTCTTCG TGAGTGGAGCTTTTGTTTG 7 1 +TGAGTGGAGCTGTTTGTTT 4 TGAGTGGAGCTTCCCAATC 12 TGAGTGGAGCTTGCAAGGG 11 TGAGTGGAGCTTTTAACAG 5 TGAGTTCTGTTTCGACACC TGAGTTCTGTTTCGACACT 5 1 TGATAACCCGTATTCAGTA 11 +TGATAACCCGTGTTAACCA 4 TGATAACCCGTTTCGGCCT 14 +TGATACGTGCTAATTCCCA 3 TGATACGTGCTAGTGAAAG TGATACGTGCTATGGAAAT 7 1 TGATACGTGCTCGAGATGT TGATACGTGCTCGATGTCT 5 1 TGATACGTGCTGTTCAACT 10 +TGATACGTGCTTTTACCCT 3 +TGATAGAAATGCCCGAATG 4 TGATAGAAATGGGTGGGAT TGATAGAAATGGGTGGAAT 6 1 TGATAGAAATGTGACCCTC TGATAGAAATGAGACCCTC,TGATAGAAATGTGACCCTT 10 2,1 TGATATTGCCTAAATCAGA 14 +TGATATTGCCTGGTGGGAT 3 TGATCACCGAGCGCTAATA 7 TGATCACCGAGTAGTCTAG 5 TGATCCCGTAGCCGCTGTT 10 TGATCCCGTAGTTGCATAT TGATCCCGTCGTTGCATAT 13 1 +TGATCGACGGTCCCAAGCA 3 +TGATCGTCATGTACGTGCT 3 TGATCGTCATGTGCGACAG 6 +TGATGCCCATATGCAT 3 TGATGTCTTTCGGTGGGAT TGATGTCTTTCACTAGGAT 7 1 TGATGTTTGAGACACTAAG TGATGTTTGAGGCACTAAG 9 1 +TGATTCGCTGGAAATCAGA 4 TGATTCGCTGGCGAATAAT TGATTCGCTGGCGAATATG 7 1 TGATTCGCTGGGCATGGGT TGATTCGCTGGACATGGGT 5 1 TGATTCGCTGGTTAAACTG 11 +TGATTGCACGCTAAGACGG 3 TGATTGCACGCTGTGCTCA 8 TGATTGTCGCCCGGCACAT TGATCGACACCCGGCACAT 6 1 TGATTTACCCTAGAGACTA TGATTTACCCTAGAAACTA 12 1 TGATTTACCCTGAATACGC 7 +TGCAAGGGATGGATTA 4 +TGCAAGGGGAATACGC 4 TGCCTCACCTTCTGGA 5 TGCGACAGCTTTCTTT 5 TGCGACAGGGCGTTAG 6 -TGGCTACCTAGCTTAT TGGCTACCGAGCCCAT,TGGCTACCTGGTTTCT 9 4,2 +TGGCTACCTAGCTTAT TGGCTACCTGGTTTCT 9 2 TGGCTACCTGCCATCG 7 TGGTATGAAGAGACTA 8 TTAAGCGTGACAAAGG TTAAGCGTTACAAACT 8 1 TTAAGCGTGAGCCCAT 6 TTATCTGTATGGGCAC 8 TTATCTGTCGAATAAT GTCTCTCTCGAATAAT 10 8 +TTATCTGTTGACGGAC 4 TTATCTGTTTATCTGT 7 +TTATCTGTTTGAGGGT 4 TTGCATATAGGCAACG TTGGACTTAGGCAACG 5 3 TTGCATATCGAATAAT 10 TTGGACTTGAGTTTCG 15 TTTGTGTCCAGATTAC 8 +TTTGTGTCTGCAAGGG TTTGTGCCTGCAAGGG 4 1 diff --git a/tests/indrop_whitelist_ed_above_thres_disc.tsv b/tests/indrop_whitelist_ed_above_thres_disc.tsv index ae7a1c11..32ece5ea 100644 --- a/tests/indrop_whitelist_ed_above_thres_disc.tsv +++ b/tests/indrop_whitelist_ed_above_thres_disc.tsv @@ -1,25 +1,36 @@ +AAAACCTCCCGACTCCT AAAACCTCCCGACACCT 3 1 +AAAACCTCCTGTACACG 3 AAAACGCCTATGACTTT 5 AAAACTCGACATCGCAG 5 AAAAGTCGGTCCTTATT 6 AAAATCGTTGCTCTAGT AAAATCGTTACTCTAGT 6 1 +AAAATCGTTGGTGGGAT AAAATCGTTAGTGGGAT 4 1 AAAGGTAATACACTAAG 10 AAAGTAATCAGAACGGG 7 +AAAGTAATCCGGCACAT AAAGTAATCAGGCACAT 3 1 AAATCGGGTCACGGACT 7 AAATCGGGTGGCGTTAG AATGCGGATGGCGTTAG 11 1 AAATGAATGTGTAGTTT AAATGAATGTGTGGTTT 9 1 +AAATGACCCGAGCGGTA AAATGACCCAAGCGGTA 4 1 +AAATGACCCTAACCATC 3 AACCCATATTAACCATC 15 +AACGAAACGATGGAAAT 3 +AACGAAACGGAGCCCAT 3 AACGAAACGGTTCAACT 15 AACTCACCGAGGAAGAC 9 AACTCACCGCCTATTCA 9 +AACTCACCGTGACCCTC AACTCACCGTGACGGAC 4 1 AACTGAGTTTGCCTCAC 7 +AACTTAGCGTCCAAAG 4 AAGACACCACTAGATTG AAAACGCCTCTAGATTG 7 1 -AAGATGGCTATGGGCAC 11 AAGATGGCTGATCTCGG 6 AAGATGGCTTGATGCCC 7 AAGATGGCTTGGCTACC AAGATGGCTAGGCTACC 11 1 AAGCGAAGTATTCAGTA 10 AAGCGAAGTCTCCGCAT 8 AAGCGAAGTTCACCGAG 5 +AAGCGCCTTGCTTTGGC AAGCGCCTTACTTTGGC 4 1 +AAGCGCCTTGGTGGGAT 3 AAGCGCCTTTGTTTGAG 6 AAGCTTCTGCTTACCT 7 AAGTGAAAGATATGCAT AAGTGAAAGTCATGCAT 5 1 @@ -27,59 +38,87 @@ AATACTCTTAATTCCCA 13 AATACTCTTACACTAAG 10 AATACTCTTCACAAGGC AATACTCTTGACAAGGC 5 1 AATATACCTAGGCAGTT 8 +AATATCTTCATGACTTT 3 +AATATCTTCGGGAAATC AATATCTTCAGGAAATC 3 1 AATCCCACGATCAGCGC 11 AATCCCACGCACGGACT 9 +AATGCGGATTCCAGTCC AATGCGGATTCCAGTCA,AATGCGGATTCCTCCCC 3 1,1 AATGTTTGCCCTTCAG 6 AATGTTTGGGGAACCT AATGTTTGAGGAACCT 5 2 +ACAGATTACCGAACGTA 4 +ACAGATTACCTTACTCC 3 ACAGGTGTCAAATCAGA 9 +ACAGGTGTCCCCAAGCA 3 +ACCATTTGAGTTGTCAT ACCCTTGGTGTTGTCAT 4 1 ACCATTTGATCCCAATC ACCCTTGGTTCCCAATC 7 1 ACCCACGAGGGTTGGT ACCCACGAAGGTTGGT 9 1 +ACCCACGATAACTACT 4 +ACCCGACTTGCAAGGAC ACCCGACTTACAAGGAC,ACCCGACTTGCAAGGGA 9 1,1 ACCCGACTTGTTGTCAT ACCCGACTTATTGTCAT 6 1 -ACCCGACTTTGCAAGGG 9 ACCCTAACCATGGATTA ACCGCAACTATGGATTA 10 1 ACCCTTGGTAAGCGTAC 5 +ACCCTTGGTATGTTGGC 4 +ACCGCAACTAGAGGTGG 4 ACCGTGTTTTGGTTTCT ACCGTGTTTTAGTGTTT,ACCGTGTTTTGAGGTCT 10 1,1 ACCTTCTTACTTCAAT 6 ACCTTCTTCTTTCTTT TCATCCTTCTTTCTTT 10 1 -ACCTTCTTTTGGATCG ACCTTCTTTTGGCTTG,CCCATCTGTTGGATCG 12 1,3 +ACCTTCTTTTGGATCG ACCTTCTTTTGGCTTG 12 1 ACGCTCTCAACTAGCCA ACGCTCTCAGTTAACCA 6 1 ACGTGCTAGTCGGTTTA 17 +ACTACTTGTTTCCGAGT 4 ACTAGATTGACTAGGAT 11 ACTAGATTGTCGGTACG ACTAGATTGTCGGTAGC 14 1 +ACTGCGTTGAAGCTTCT 3 ACTTAGGTAAGTGATGC 7 ACTTCAATCTTTAATC ACTTCAATCTTTTTTT 5 1 ACTTCAATTGGGATTC 14 +AGAACGATTTGTAAAGG 3 AGAGACTAACTGCCGT 8 +AGAGACTACCGCTGTT 3 AGAGACTATGAAATGA AGAGACTATGTAATGA 11 1 AGAGACTATGACGGAC 10 AGAGGTGGCCCATCTG 9 AGAGGTGGCGACGTCA 5 AGATGTATTAAGCTTCT AGATGTATTACGCTTCT 7 1 AGATGTATTCCACATTA 8 +AGATGTATTCGATTGAT AGATGTATTCCCTTGGT 3 1 AGATGTATTTGTACACG 10 AGCAACCTGCGGCTTAC 8 +AGCACCACCATCTCCC 4 AGCACCTCTTACCAGGC 6 +AGCCAAGATGTTGTCAT AGCCAAGATGTTGTATC 4 1 AGCCAAGATTGAGGTCT 13 AGCCTCTTTAATTCCCA 10 +AGCTTTCCATACAGCCG 4 AGGCAACGTCATATGG 13 AGGCAACGTTTAACAG 9 AGGCCTAAGGGTGGGAT 5 AGGGAACGACTTCGCAC 9 AGGGAACGATTGCATAT 9 AGGGAAGGTGGCGTTAG CGGGAAGGTGGCGTTAG 9 1 +AGGGAAGGTTACTTGTG 3 AGGGCCAATTCTCACTT AGGGCCAATTCACGTTT 5 1 AGGGTGTATCAGTTTGC 8 +AGGGTGTATCTTGGTGT 4 AGGGTGTATGACAGATA AGGGCCAATGACAGATA,CGGGTGTATGACAGATA 5 1,1 +AGGTGACACCGAGATGT 4 AGGTTAGTGAAGGTAAT 10 -AGGTTAGTGTCGAAGCT AGGTTAGTGACCACGCT,CGGTTAGTGTCGAAGCT 6 3,1 +AGGTTAGTGTCGAAGCT CGGTTAGTGTCGAAGCT 6 1 +AGTATGAGTAGAGGTGG 4 AGTATGAGTCCCATCTG AGTATGAGTCCCGACTT 5 1 AGTATGAGTTGCCTCAC AGTATGAGTAGCCTCAC 7 1 AGTCAAAGACCTGACAC AAGCAAAGTCCTGACAC 7 1 AGTCAATACACTGCCGT AGCCAAGATACTGCCGT,CGTCAATACACTGCCGT 5 1,1 +AGTCAATACTTGCATAT 4 +AGTGGATGGGAGAGTAT 3 AGTGTCGGACGGGCTTT 6 AGTTTACGTACTAGGAT 5 +AGTTTACGTCCCTAACC 4 +AGTTTACGTTGACGGAC AGTTTACGTTGACCCTC 3 1 +AGTTTACGTTTAAACTG 4 ATAGTCGCAATCAAGTG 6 ATAGTGGACCGAGATGT ATAGTGGACGGAGATGT 6 1 +ATATGCATGTACCTTG 4 ATCACGTTTAATGACCC ATCACGTTTACTGACCC,ATCACGTTTCCTGACAC 10 1,1 ATCACGTTTCCACATTA 8 ATCACGTTTCGAATAAT 5 @@ -88,29 +127,40 @@ ATCCGCTATGTAAAGG 6 ATCGGTTCGAACCTGAC 11 ATCGGTTCGCAAGTCAT 6 ATCGGTTCGGATTAGAC ATCGGTTCGGATTAACA,ATCGGTTCGTTTTAGAC 10 1,1 -ATCTTTGTGTCCATGT ATCTTTGTATCCATGT,ATCTTTGTCTTCAGGT 6 1,3 +ATCTGCATCAGCAGAAC GTCTGCATCAGCAGAAC 4 1 +ATCTTTGTGTCCATGT 6 ATCTTTGTTAACCATC ATCTTTGTTGACCAGT 7 1 ATGCTCCGTGTTACGAT 9 +ATGCTCCGTTTATCTGT 4 ATGGCCTGTAATGTTTG ATGCTCCGTAATGTTTG 9 1 ATGGCCTGTAGCACCAC 7 +ATGGCCTGTGTTGTCAT 4 +ATGGCCTGTTGACGGAC 3 ATGTGTCCTATGGAAAT ATGTGTCCTGTGGAAAT 13 1 ATGTGTCCTGCTTTGGC ATATGTCCTGCTTTGGC 6 1 ATGTGTCCTTAACCATC 6 ATTCCAGACTTGTGACT ATTCCAGACTTGAGGGT 7 1 +ATTGATTCTATCAAGTG ATTGATTCTGTCAAGTG 3 1 +ATTGATTCTGTTAACCA ATTGATTCTATTAACCA,TTTGATTCTGTTAACCA 4 1,1 ATTGATTCTTGTAAAGG ATTGATTCTTGTAAAGA 6 1 +ATTGGATCGATGTCGGA 4 ATTGTGACTTCCGTCCA 6 ATTTCCGGCGAATAAT ATTTCCGGCAATTAGT,CTTTCCGGCGAATAAT 8 2,1 ATTTCCGGTACTTGTG 9 CAGTTTGCGGCGTTAG CAGTTTACGGCGTTAG 7 1 CATCGCAGCGACTCCT 5 CATCGCAGCTTCAGGT 6 +CATCGCAGTAGCCTCG 4 CATCGCAGTTATAGCC 14 CCAACCGTCCACATTA 7 CCAACCGTCGGGCTTT 9 CCAACCGTGTTCAACT 12 CCAACCGTTAGTCTAG 7 +CCCATCTGTCTGTGGT CCCATCTGTCCGTGGT,CCCATCTGTCTGTGAT 4 1,1 CCGCTGTTATCAGCGC 8 +CCGCTGTTTGCAAGGG CCGCTGTTCGCAAGGG,CCGCTGTTTGCAAAGG,CCGCTGTTTGCCATCG 4 1,1,1 CGAACGTAATACTCTT CAAACGTAATACTCTT 7 1 +CGAACGTACATTTGTT 4 CGAACGTAGGCGTTAG CGAACGTAGGAGTTAG 9 1 CGAACGTATCCCAATC CGAACGTATGCGTATC 10 1 CGCTAATAGCATGGGT 5 @@ -119,91 +169,128 @@ CGGCTTACAGAGGTGG 6 CGTATTTCTTGCATAT CGTAATTCTTGCATAT 9 1 CTATAGAGCGAACGTA 5 CTATAGAGTGCCATCG CTATAGAGTGCTATTT 6 1 +CTATAGAGTTTGTGTC CTATAGAGTTTGTGTA 4 1 +CTTACGGGCCCATCTG 4 +CTTACGGGCGGCACAT 4 CTTACGGGTAGCTTAT 10 CTTCAGGTGGATAAAG 9 CTTCGATTCTCGCGTA 6 +CTTCGATTGAAGTGCC 4 CTTCGATTGCGTTGCT CTTCGATTGAGTTTCG,CTTCGATTTCGTTGCT 7 1,1 CTTCGATTTAAATAGG CTTCGATTTAAATAGT 11 1 CTTTAATCCGCTCTCA 6 CTTTAATCGGGAGGTA CTTTAATCAGGAGGTA,CTTTAATCTGGAGGTA 7 2,1 CTTTCTTTATTCCTTG 5 CTTTCTTTGGAGAAGC CTTTCTTTAGAGAAGC 5 2 -GAAAGATTGTGCTTACCT GAAAGATTGTACTTACCT 8 3 +GAAAGATTGTGCTTACCT 8 GAAAGCGTACGTTAACCA GAAAGCGTACATTAACCA 8 1 -GAACACTAAGATGGAAAT GAACACTAAGTTGGACTT 8 3 +GAACACTAAGATGGAAAT 8 +GAACACTAAGCCCAAGCA 3 GAACACTAAGTGGTATGA 7 GAACGCCATTATTTCCGG 8 GAACGCCATTCATCGCAG 11 GAACGCCATTTCTCAACC 13 +GAACTAGGATTGCAAGGG 4 GAACTGCCGTGTCAATAC GAACTGCCGTTTCAATAC 11 1 GAAGGAAGACCAGATTAC 6 GAAGGCAGTTATATGCAT 9 GAAGGCAGTTCAGTTTGC 10 GAAGTTTAGAAGGCAACG 11 +GAATCAAGTGGGATAAAG 4 GAATTCCTTGATTGGGCC 6 GAATTCCTTGTGACCCTC GAATTCCTTGAGACCCTC 7 1 +GACAATTAGTCTATAGAG GACAATTAGTCTATAAAG 3 1 GACACGGACTACCTTGCC 7 -GACACGGACTCACGGACT GACACGGACTCGCGGACT,GATACAAACTCACGGACT 9 1,3 +GACACGGACTCACGGACT GACACGGACTCGCGGACT 9 1 +GACACGGACTCGGGCTTT 3 GACACGGACTTTAAGCGT 5 GACAGTCCCTTAACCATC 6 GACATTTGTTGAATACGC GACATTTGTTGAATACGA 13 1 GACATTTGTTTTTAACAG 7 +GACCACATTACAATTAGT 3 GACCACATTATGCAAGGG 9 GACCCGAATGAATGACCC 8 +GACCCGAATGGCATGGGT 3 GACCCGAATGTGACCAGT 7 +GACCCTTCAGATTCCTTG GACTCTTCAGATTCCTTG 4 1 GACCTACTAGCCTACTAG GACCTACTAGCCTACTAA 11 1 GACCTACTAGGGTGGGAT GACCTACTAGAGTGGGAT 11 1 GACCTACTAGTGCGTATC 6 +GACCTACTAGTTCACATA GACCTATTAGTTCACATC 4 1 +GACCTGACACTAACCATC 4 GACGAATAATAAGATTGT 5 +GACGACTCCTGTTAACCA 4 GACGACTCCTTGACCCTC TACGACTCCTTGACCCTC 5 1 GACGACTCCTTGGTTTCT 5 GACGAGATGTCACGGACT 5 +GACGAGATGTGAGGCTGA GACGAGATGTAAGGCTGA 4 1 GACGAGATGTGGATAAAG 6 GACGAGATGTGTTTGTTT GACGCGATGTGTTTGTTT 15 1 GACGAGATGTTCGGTTTA 8 GACGAGATGTTGCGTATC GATGAGAAGTTGCGTAAC 5 1 +GACGCTAGTCGGCATGCT GACGCTAGTCAGCATGCT,GACGCTAGTCGTCATATT 3 1,1 GACGCTAGTCTGAGGTCT 9 GACGTCAGCACGTACCTA 5 +GACTCTTGACGCTTTGGC GACTCTTGACACTTTGGC 3 1 GACTGTCTGGCTCTTGAC GACTGTCTGGCTCTTGAA 8 1 GACTGTCTGGTACAGCCG 6 +GACTTACTCCATTACGAT 3 GACTTACTCCTCCGTCCA 9 +GACTTCTTCGAGGCAGTT GACTTCTTCGCCGCTGTT 4 1 GACTTCTTCGATCCCACG 6 GAGAAGGCTTACATCTAT 7 GAGAAGGCTTGAATACGC GAGAAGGCTTAAATACGC 7 1 +GAGAAGGCTTTCCAGTCC 3 GAGAATACGCCGGCACAT 5 GAGAATTCGTATGGGCAC 6 +GAGAATTCGTGAGAATTG 4 GAGACAGATAAGTTTAGA 6 +GAGACAGATATACAGCCG GAGACAGATATACAGCCA 4 1 +GAGACGATGGCCTGTTAT 4 GAGAGAATTGATATGCAT 7 GAGAGAATTGATGGAAAT 6 GAGAGAATTGGATCGTTT 5 GAGAGAATTGGCATGGGT 8 GAGAGAATTGTACTTGTG GAGAGAAATGTACCAGTG 5 1 GAGAGAGTATATTCGACG GAGAGAGTATATTCCTTG 6 1 +GAGAGAGTATGCGTTGCT 3 +GAGAGCCCATGTAACGTT 4 GAGAGCCCATTTTGTGTC 8 GAGATCTCGGCAATTAGT 5 +GAGATCTCGGGCATGGGT 3 GAGATCTCGGTGTAGTTT GAGATCTCGGTGTATTTG,GAGGTCTCGGTGTAGTTT 9 1,1 GAGATTGCGACTAGTAAC 5 GAGATTGCGAGTTACGAT GAGATCTCGGGTTACGAT,GAGATTGCGAATTACGAT 5 1,1 GAGCGTTGCTATCAAGTG 8 GAGCGTTGCTATTCCTTG 9 GAGGCGTTAGCTTCAGGT 5 +GAGGCGTTAGTTGATTCT 3 GAGGGAACCTTGCCATCG GAGGAAACCTTGCCATCG 6 1 +GAGGGAGGTACTAGTAAC 3 GAGGTCCCTTGAGTTTCG 8 +GAGGTCCCTTTACTTGTG 3 GAGTCCAAAGATGGATTA 5 GAGTCCAAAGCGAATAAT GAGTCCAAAGCGAATAAA,TAGTCCAAAGCGAATAAT 6 1,1 GAGTCCAAAGGGGAGGTA 5 GAGTCCAAAGTAGCCTCG GAGTCCAAAGTAGTCGCA 8 1 +GAGTTAACCAGATCTCGG GAGTTAACCAAATCTCGG 3 1 GAGTTTCGCAAGAGGG GAGTTTCGCGAGATGT 5 1 GAGTTTCGGTAATCCC 10 +GATAACCATCTCGAAGCT 3 GATAAGACGGCTTACTCC 7 GATAAGACGGTAACCCGT 7 GATACAAACTATGGAAAT GATACAAACTATGAAATG 5 1 GATACAAACTGTAACGTT GATACAAACTGTTCTGTT 7 1 GATAGTGTTTACACTAAG 5 +GATAGTGTTTTACCAGGC 4 +GATCCCAATCAATGTTTG 3 GATCCGTCCACGCTAATA GATCCGCCCACGCTAATA 5 1 GATCGGTACGCCAGCAGT GATCGGTACGCCAGCAGG 8 1 +GATCGGTACGTTGATCTA GATCGGTACGTTGATTAA 4 1 GATCGGTTTATTCGGCCT GATCGATTTATTCGGCCT 7 1 -GATCTCAACCTGGCCTGT GATCTCAACCTAGCCTCG,GATCTCAACCTGACCTGT 6 3,1 +GATCGTTTGAGCGGTA GATCGTTTAAGCGGTA 4 1 +GATCTCAACCACTGAGTT 3 +GATCTCAACCTGGCCTGT GATCTCAACCTGACCTGT 6 1 GATCTGTGGTGGGTTGGT 5 GATCTGTGGTTTTGTGTC 7 GATGACCCTCGGCCCTTA 6 @@ -211,20 +298,31 @@ GATGAGGTCTAGAAGTCC 6 GATGAGGTCTTGACCCTC GATGAGGTCTTTCCGCTC 7 1 GATGAGGTCTTTGTTTAC GATGAGGTCTTTGATTCT 7 1 GATGAGGTCTTTTAACAG 6 +GATGCCATCGCTTCAGGT 3 +GATGGAGGAGGATGTGAG GATGGAGGAGAATGTGAG 3 1 GATGGGATTCCGGCACAT GATGGGATTCCGTCACAT 5 1 GATGGGATTCTCGGTTTA 7 +GATGGTTTCTGCAAGGAC GATGGTTTCTACTAGGAT 4 1 +GATGGTTTCTTAGAAATG 4 GATGGTTTCTTTGAGGGT GATGGTTTCTTCGAAGCT,GATGGTTTCTTTGAGGTT 5 2,1 GATGTAAAGGCCCTTCAG 7 +GATGTAAAGGGGATAAAG 3 +GATGTACACGAAAGCCTA 4 GATGTACACGCATCTCCC 7 GATGTAGTTTCAGATTAC 12 +GATGTAGTTTCTGTCTGG 4 GATGTAGTTTCTTACTCC 8 +GATGTGAGAGGAAGAC 3 GATGTGAGCGAATAAT GATGTGAGCGAATATG 7 1 GATGTGAGGCAAAGCC GATGTGAGGCAAGGCC 11 1 +GATGTGAGTCGGTTTA GATGTGAGTCGATTTA 4 1 +GATGTTCCAGATTGGGCC 4 GATTAGACTAACTACT 10 GATTAGACTGGCCTGT GATTAGACAGGCCTGT 6 1 GATTCCGAGTATGGGCAC 5 GATTCCGAGTCTTTCTTT 11 GATTGAGGGTAACCTGAC 12 +GATTGATCTAAGTTTAGA 4 GATTGGCGTTGGTCCCTT GATTGGCGTTTGACCCTC 5 1 GATTGGCGTTTAGTCTAG 7 GATTTAACAGGATGTGAG 8 @@ -232,20 +330,28 @@ GATTTAACAGTAACTACT 13 GATTTAACAGTCATATGG 6 GCAGCATGACACTAAG GCAGCATGACACTCAG 6 1 GCAGCATGATGGGCAC CCAGCATGATGGGCAC 7 1 +GCAGCATGGATCTCGG GTTGCACGGATCTCGG 3 1 +GCATGGGTTAACCCGT GCATGGGTCCAACCGT 4 1 GCTTACCTCTACTTGT GCTTACCTCTTCTTCG 8 1 -GGAAACAGTCCCAATC GGAAACAGTACCAGGC,TGCGACAGTCCCAATC 6 3,1 +GCTTTGGCACTAGCCA 4 +GGAAACAGACTAGGAT 3 +GGAAACAGTCCCAATC TGCGACAGTCCCAATC 6 1 GGAGAAGCCCACATTA 5 GGATAAAGAGAGACTA 5 GGCATGCTTTCCGCTC 6 +GGGTTGGTGTTCAACT GGGTTGGTGTTCAATC 4 1 GTCTCTCTAACTTAGC GTCTCTCTAACGTAGC 7 1 +GTCTCTCTATATGCAT 3 GTCTCTCTATTTCCGG GTCTCTCTAATTCCCA,GTCTCTCTATTTCGGG 11 1,1 GTCTCTCTTGCGACAG 7 GTGTAACCTGCGACAG 5 +GTTACGATATGGAAAT 4 GTTCAACTCCGTGTTT GTTCAACTCACAGTTT,GTTCAACTCCGTTTTT 6 1,1 GTTCAACTTACAGCCG GCTCTAGTTACAGCCG,GTTCAACTTACCGGCA 5 1,1 GTTCAACTTGACCAGT 7 GTTCAACTTGATGCCC 15 GTTGCACGCGATTGAT 5 +GTTGCACGGTCCGTAC 3 GTTGCACGTGTAGTTT GTTGCACGTGTAGTTG 7 1 GTTTGTTTCTCCGCAT GTTTGTTTTTCCGCTC 9 1 GTTTGTTTGGAAACAG 5 @@ -256,7 +362,11 @@ TACAGCCGGTTGCACG TACAGCCGGCAGCATG,TACAGCCGGTTGCACA 11 1,1 TACAGCCGTCGGTACG 7 TACTTGTGTACAGCCG 8 TACTTGTGTGTTCCAG TACTTGTGTATTCCAG 10 1 +TAGCCTCGCCGCTGTT TAGCCTCGCCGCTTTT 3 1 +TAGCCTCGGCAGTCGA 3 +TAGCCTCGTCGACGGT TAGCCTCGTCGAGTGC 4 1 TAGCTTATCGGCACAT 7 +TAGCTTATTGTAAAGG 3 TAGTCTAGCGACGTCA TAGTCTAGCGACGTCC 6 1 TAGTCTAGTTATAGCC 5 TAGTCTAGTTGATCTA AGGCCTAGTTGATCTA 7 1 @@ -267,14 +377,17 @@ TCATCCTTTTCCGCTC TCCTTATTTTCCGCTC,TTATCCTTTTCCGCTC 8 1,1 TCCTTATTCAGTCCCT 10 TCCTTATTTAGTCTAG 6 TCGAAGCTTGCCTCAC 8 -TCGAAGCTTTCCGAGT TCGAAGCTTTCGGCCT 6 3 +TCGAAGCTTTCCGAGT 6 TGAAAACTGGTAACTTAGC 8 TGAAAACTGGTCCCATCTG TGAAAACTGGTCCCATCTA 8 1 TGAAAACTGGTCTTTAATC 6 +TGAAAATGTCGGATTGCGA TGAAAATGTCGAATTGCGA 4 2 TGAAACAATCCATATGCAT 12 TGAAACACACGCTTTCTTT TGAAACACACGCCGTGTTT,TGAAACACACGCTTTAATC,TGAAACACACGCTTTCTTA 16 1,1,1 +TGAAACACACGTACCGGCA 3 TGAAACCCTTGAATGTTTG 6 TGAAACCCTTGCAATTAGT 9 +TGAAACCCTTGGGAGAAGC TGAAACCCTTGAGAGAAGC 4 1 TGAACATCTATGAGCCCAT TGAACATCTATAAGCCCAT,TGAACATCTATGAGCACAT 8 1,1 TGAACGCTCAGTGACCCTC TGAACGCTCAGTGACCTCA,TGAACGCTCAGTGACTCTC 12 1,1 TGAAGAAGTTAACTTCAAT 5 @@ -284,41 +397,55 @@ TGAAGGAGCTTTCGAAGCT 7 TGAAGGTTGTGGCAGTCGA 8 TGAAGGTTGTGGCTTTGGC TGAAGGTTGTGGCTTTTGC 5 1 TGAAGGTTGTGGGAAACAG TGAAGGTTGTGAGAAACAG 7 1 +TGAATACCCAGGGAATAGA 3 TGAATACCCAGTCGGTACG TGAATACCCAGTCGGTATG 11 1 TGAATGAAGCACATTTGTT TGAATGAAGCACATTTTTA 6 1 -TGAATGACTTTGAATACGC TGAATGACTTTAACTACTC 9 3 +TGAATGAAGCAGAAAGACC GGAATGAAGCAGAAAGACC 3 1 +TGAATGACTTTACCTTCTT TGAATGACTTTTCAATCTT 3 1 +TGAATGACTTTGAATACGC 9 TGAATGCATGGCATCTCCC 10 TGAATTCGACGGCGTTGCT 10 TGACAACAAATTAGTGTTT TGAAAACAAATTAGTGTTT 7 1 TGACAAGTCATTTAAGCGT 7 TGACAATACTTATTCCTTG TGACAATACTTATTTCCGG 16 1 TGACACAACAGGAGCGGTA 7 +TGACACAACAGGCCTCTTT 4 TGACACAACAGTGATGCCC 6 TGACACAGTTTATATCTTC TGACACAGTTTCTTTCTTT 9 1 TGACACAGTTTTGTACACG 9 TGACACTTTCTAGCACCAC 5 TGACACTTTCTGACGATGG 9 +TGACACTTTCTTAACTACT 3 +TGACAGACCATCCTATTCA TGACAGACCATCCTACTAG 4 2 TGACAGACCATGGTCCCTT TGACAGACCATAGTCCCTT 10 1 +TGACATACATTGAGTTTCG TGACATACATTAAGTTTCG 3 1 +TGACATACATTGGACTTCC TGACAGACCATGGACTTCC,TGACATACATTAGACTTCC 4 1,1 TGACATACATTTACAAACT TGACATACATTAACAATCC 10 1 TGACCAGCAGTCCACATTA TGACCATTACTCCACATTA 6 1 TGACCAGTGTCAAAGA 7 TGACCATTACTCTTACTCC 11 TGACCATTACTGTCCAAAG TGACCATTACTTTCCAAAG 11 1 +TGACCATTACTTGTAAAGG TGACCAGTACTTGTGT 3 1 TGACCTCGTCTAGGAAGAC 7 TGACCTCGTCTCGAGATGT 9 TGACCTCGTCTTGCTATTT TGACCTCATCTTGCTATTT,TGACCTCGTCTTCCTATTT,TGACCTCGTCTTGCTATTC 6 1,1,1 TGACCTCGTCTTTCACATA 11 TGACGTAGAAGCAACAAAT TGACGTAGAAGCGAATAAT 9 1 TGACGTAGAAGTTATAGCC TGACGTAGAAGTTATACCA 9 1 +TGACGTATCGGGAAAGACC TGACGTATCGGGAAACTCC 3 1 TGACTACCGTTTTTGGGAG 6 TGACTACGAGCCCTACTAG 7 TGACTCAGAATCATCTCCC 10 TGACTGTTCTTACTTCAAT TGACTGTCCTTACTTCAAT 8 1 +TGACTTGGTGTAGAAGTCC 3 +TGACTTGGTGTTTAAGCGT 4 +TGAGAAGCACTCAGTCCCT TGAGAAGCATTCAGTCCCT 3 1 TGAGAAGCACTCCCATCTG TGAGAAGCACTCCCATGTG 11 1 TGAGAAGCACTGTTTGTTT 6 TGAGACAAAGGATTTCCGG TGACACAAGGCATTTCCGG 8 1 TGAGACAACTTCAGTTTGC TGAGACAACTTCGGCTTAC 8 2 TGAGAGTTCACTCGACACC 13 +TGAGATTTACCCTAGTAAC TGAGATTTACCGTAGTAAC 3 1 TGAGCAGGGTATTTGTGTC TGAGTAGGGTATTTGTGTC 13 1 TGAGCCGGATTATCCCACG 8 TGAGCGCATTCCCTATTCA TGAGCGCATTCGCTATTCA 6 1 @@ -326,54 +453,74 @@ TGAGCGCATTCCGTCAGCA 8 TGAGCGCATTCTCGGTACG 11 TGAGGAATAGAGGCATGCT 6 TGAGGAATAGATAGCTTAT TGAGGAATAGAGAGCCCAT 7 1 +TGAGGGAAATCGAGCCCAT 4 TGAGGTTTCTCAGCACCAC TGAGGTTTCTCAGCACCAT 10 1 TGAGGTTTCTCGATTGCGA TGAGGTTTCTCAATTGCGA 5 2 +TGAGTAATCCCTAGCCTCG TGAGTAATTCCTAGCCGCG 4 1 TGAGTACACTCGAATACGC 6 +TGAGTCCATGTAAACGCCT TGAGTCCATGTAAAAGACC 3 1 TGAGTCCATGTCAAGAGGG 11 +TGAGTCCATGTCGGCTTAC 4 TGAGTCCATGTGGCGGTTT TGAGTCCATGTGGTGGTTT 5 1 -TGAGTCCATGTTCTCAACC TGAGTCCATGTTCTCAAAC,TGAGTCCATGTTGGCTACC 11 1,1 -TGAGTCCGTACGTTGTCAT TGAGTCCGTACATTGTCAT 5 3 +TGAGTCCATGTTCTCAACC TGAGTCCATGTTCTCAAAC 11 1 +TGAGTCCGTACGTTGTCAT 5 TGAGTCGTCGTGAGCCCAT TGAGTCGTCGTCAGTCCCT 5 1 TGAGTGGAGCTATGACCGA TGAGTGGAGCTATCAACGC 9 1 TGAGTGGAGCTCTTCTTCG TGAGTGGAGCTTTTGTTTG 7 1 +TGAGTGGAGCTGTTTGTTT 4 TGAGTGGAGCTTCCCAATC 12 TGAGTGGAGCTTGCAAGGG 11 TGAGTGGAGCTTTTAACAG 5 TGAGTTCTGTTTCGACACC TGAGTTCTGTTTCGACACT 5 1 TGATAACCCGTATTCAGTA 11 +TGATAACCCGTGTTAACCA 4 TGATAACCCGTTTCGGCCT 14 +TGATACGTGCTAATTCCCA 3 TGATACGTGCTAGTGAAAG TGATACGTGCTATGGAAAT 7 1 TGATACGTGCTCGAGATGT TGATACGTGCTCGATGTCT 5 1 TGATACGTGCTGTTCAACT 10 +TGATACGTGCTTTTACCCT 3 +TGATAGAAATGCCCGAATG 4 TGATAGAAATGGGTGGGAT TGATAGAAATGGGTGGAAT 6 1 TGATAGAAATGTGACCCTC TGATAGAAATGAGACCCTC,TGATAGAAATGTGACCCTT 10 2,1 TGATATTGCCTAAATCAGA 14 +TGATATTGCCTGGTGGGAT 3 TGATCACCGAGCGCTAATA 7 TGATCACCGAGTAGTCTAG 5 TGATCCCGTAGCCGCTGTT 10 TGATCCCGTAGTTGCATAT TGATCCCGTCGTTGCATAT 13 1 +TGATCGACGGTCCCAAGCA 3 +TGATCGTCATGTACGTGCT 3 TGATCGTCATGTGCGACAG 6 +TGATGCCCATATGCAT 3 TGATGTCTTTCGGTGGGAT TGATGTCTTTCACTAGGAT 7 1 TGATGTTTGAGACACTAAG TGATGTTTGAGGCACTAAG 9 1 +TGATTCGCTGGAAATCAGA 4 TGATTCGCTGGCGAATAAT TGATTCGCTGGCGAATATG 7 1 TGATTCGCTGGGCATGGGT TGATTCGCTGGACATGGGT 5 1 TGATTCGCTGGTTAAACTG 11 +TGATTGCACGCTAAGACGG 3 TGATTGCACGCTGTGCTCA 8 TGATTGTCGCCCGGCACAT TGATCGACACCCGGCACAT 6 1 TGATTTACCCTAGAGACTA TGATTTACCCTAGAAACTA 12 1 TGATTTACCCTGAATACGC 7 +TGCAAGGGATGGATTA 4 +TGCAAGGGGAATACGC 4 TGCCTCACCTTCTGGA 5 TGCGACAGCTTTCTTT 5 TGCGACAGGGCGTTAG 6 -TGGCTACCTAGCTTAT TGGCTACCGAGCCCAT,TGGCTACCTGGTTTCT 9 4,2 +TGGCTACCTAGCTTAT TGGCTACCTGGTTTCT 9 2 TGGCTACCTGCCATCG 7 TGGTATGAAGAGACTA 8 TTAAGCGTGACAAAGG TTAAGCGTTACAAACT 8 1 TTAAGCGTGAGCCCAT 6 TTATCTGTATGGGCAC 8 TTATCTGTCGAATAAT 10 +TTATCTGTTGACGGAC 4 TTATCTGTTTATCTGT 7 -TTGCATATAGGCAACG TTGGACTTAGGCAACG 5 3 +TTATCTGTTTGAGGGT 4 +TTGCATATAGGCAACG 5 TTGCATATCGAATAAT 10 TTGGACTTGAGTTTCG 15 TTTGTGTCCAGATTAC 8 +TTTGTGTCTGCAAGGG TTTGTGCCTGCAAGGG 4 1 diff --git a/tests/indrop_whitelist_user.tsv b/tests/indrop_whitelist_user.tsv index 0013184d..934b7d05 100644 --- a/tests/indrop_whitelist_user.tsv +++ b/tests/indrop_whitelist_user.tsv @@ -11,6 +11,7 @@ AAAGTAATCAGAACGGG 7 AAAGTAATCCGGCACAT AAAGTAATCAGGCACAT 3 1 AAATCGGGTCACGGACT 7 AAATCGGGTGGCGTTAG 11 +AAATGAATGGAAAGACC 2 AAATGAATGTGTAGTTT AAATGAATGTGTGGTTT 9 1 AAATGACCCGAGCGGTA AAATGACCCAAGCGGTA 4 1 AAATGACCCTAACCATC 3 @@ -18,10 +19,12 @@ AACCCATATTAACCATC 15 AACGAAACGATGGAAAT 3 AACGAAACGGAGCCCAT 3 AACGAAACGGTTCAACT 15 +AACGGTAGCGCCGGATT 2 AACTCACCGACGTATAC AACTCACCGGCGTATAC 6 1 AACTCACCGAGGAAGAC 9 AACTCACCGCCTATTCA 9 AACTCACCGTGACCCTC 4 +AACTGAGTTGATCGTTT 2 AACTGAGTTTGCCTCAC 7 AACTTAGCGTCCAAAG 4 AAGACACCACTAGATTG 7 @@ -32,9 +35,12 @@ AAGATGGCTTGGCTACC AAGATGGCTAGGCTACC 11 1 AAGCGAAGTATTCAGTA 10 AAGCGAAGTCTCCGCAT 8 AAGCGAAGTTCACCGAG 5 +AAGCGAAGTTCACGTTT 2 +AAGCGCCTTCAAGAGGG 2 AAGCGCCTTGCTTTGGC AAGCGCCTTACTTTGGC 4 1 AAGCGCCTTGGTGGGAT 3 AAGCGCCTTTGTTTGAG 6 +AAGCGCCTTTTCCGCTC 2 AAGCTACGGTCATATGG 10 AAGCTTCTCTTTCTTT 5 AAGCTTCTGCTTACCT 7 @@ -42,14 +48,21 @@ AAGTGAAAGATATGCAT 5 AATACTCTTAATTCCCA 13 AATACTCTTACACTAAG 10 AATACTCTTCACAAGGC AATACTCTTGACAAGGC 5 1 +AATACTCTTCAGTTTGC 2 +AATATACCTACCTTCTT 2 AATATACCTAGGCAGTT 8 +AATATCTTCAATGTTTG 2 AATATCTTCATGACTTT 3 AATATCTTCGGGAAATC AATATCTTCAGGAAATC 3 1 AATCCCACGATCAGCGC 11 AATCCCACGCACGGACT 9 +AATCCCACGGAAAGACC 2 +AATGCGGATACCTTCTT 2 AATGCGGATTCCAGTCC AATGCGGATTCCAGTCA 3 1 +AATGTTTGAATGTTTG 2 +AATGTTTGAGGAACCT 2 AATGTTTGCCCTTCAG 6 -AATGTTTGGGGAACCT AATGTTTGAGGAACCT 5 2 +AATGTTTGGGGAACCT 5 ACAGATTACCGAACGTA 4 ACAGATTACCTTACTCC 3 ACAGGTGTCAAATCAGA 9 @@ -66,6 +79,7 @@ ACCCGACTTTGCAAGGG 9 ACCCTAACCATGGATTA 10 ACCCTTGGTAAGCGTAC 5 ACCCTTGGTATGTTGGC 4 +ACCCTTGGTGATTAGAC 2 ACCGCAACTAGAGGTGG 4 ACCGTGTTTCTGTTCTT 5 ACCGTGTTTTGGTTTCT 10 @@ -73,15 +87,20 @@ ACCTTCTTACTTCAAT 6 ACCTTCTTCTTTCTTT 10 ACCTTCTTTGCGTATC 11 ACCTTCTTTTGGATCG 12 +ACCTTGCCTTCGGCCT 2 ACGCTCTCAACTAGCCA 6 +ACGGAATTTGCGTTGCT 2 +ACGTGCTAGGCAGCATG 2 ACGTGCTAGTCGGTTTA 17 ACTACTTGTTTCCGAGT 4 ACTAGATTGACTAGGAT 11 ACTAGATTGTCGGTACG 14 ACTGCGTTGAAGCTTCT 3 +ACTGCGTTGCCCAAGCA 2 ACTTAGGTAAGTGATGC 7 ACTTCAATCTTTAATC 5 ACTTCAATTGGGATTC 14 +AGAACGATTCTCAGAAT 2 AGAACGATTTGTAAAGG 3 AGAGACTAACTGCCGT 8 AGAGACTACCGCTGTT 3 @@ -89,25 +108,34 @@ AGAGACTATGAAATGA AGAGACTATGTAATGA 11 1 AGAGACTATGACGGAC 10 AGAGGTGGCCCATCTG 9 AGAGGTGGCGACGTCA 5 +AGAGGTGGGTTGTCAT AGAGGTGGGTTGTCCT 2 1 AGATGTATTAAGCTTCT AGATGTATTACGCTTCT 7 1 AGATGTATTCCACATTA 8 AGATGTATTCGATTGAT 3 AGATGTATTTGTACACG 10 +AGCAACCTGACTTCAAT 2 AGCAACCTGCGGCTTAC 8 AGCACCACCATCTCCC 4 AGCACCTCTTACCAGGC 6 +AGCAGAACCGAATAAT 2 +AGCAGAACTTGGCGTT 2 AGCCAAGATGTTGTCAT 4 AGCCAAGATTGAGGTCT 13 AGCCTCTTTAATTCCCA AGCCTCTTTAATTCCCC,AGCCTCTTTGATTCCCA 10 1,1 AGCCTCTTTCAGTCCCT 8 AGCTTTCCATACAGCCG 4 +AGGCAACGACCTTCTT 2 +AGGCAACGGTCTCTCT 2 AGGCAACGTCATATGG 13 AGGCAACGTTTAACAG 9 AGGCCTAAGGGTGGGAT 5 AGGGAACGACTTCGCAC 9 +AGGGAACGAGCCGGATT 2 AGGGAACGATTGCATAT 9 +AGGGAAGGTAGTTTAGA 2 AGGGAAGGTGGCGTTAG CGGGAAGGTGGCGTTAG 9 1 AGGGAAGGTTACTTGTG 3 +AGGGAAGGTTTCCGCTC 2 AGGGCCAATTCTCACTT 5 AGGGTGTATCAGTTTGC 8 AGGGTGTATCTTGGTGT 4 @@ -157,11 +185,13 @@ ATGTGTCCTTAACCATC 6 ATTCCAGACTTGATCTA 6 ATTCCAGACTTGTGACT 7 ATTGATTCTATCAAGTG ATTGATTCTGTCAAGTG 3 1 +ATTGATTCTGAAGGCTT 2 ATTGATTCTGTTAACCA ATTGATTCTATTAACCA,TTTGATTCTGTTAACCA 4 1,1 ATTGATTCTTGTAAAGG ATTGATTCTTGTAAAGA 6 1 ATTGGATCGATGTCGGA 4 ATTGGATCGGTGTCGGA 3 ATTGTGACTTCCGTCCA 6 +ATTTCCGGCAATTAGT 2 ATTTCCGGCGAATAAT CTTTCCGGCGAATAAT 8 1 ATTTCCGGGTTACGAT ATTTCCGGATTACGAT 8 1 ATTTCCGGTACTTGTG 9 @@ -170,6 +200,7 @@ CATCGCAGCGACTCCT 5 CATCGCAGCTTCAGGT 6 CATCGCAGTAGCCTCG 4 CATCGCAGTTATAGCC 14 +CATCGCAGTTGAGGGT 2 CATCGCAGTTTATCAC 4 CCAACCGTCCACATTA 7 CCAACCGTCGGGCTTT 9 @@ -179,11 +210,13 @@ CCCATCTGTCTGTGGT CCCATCTGTCCGTGGT,CCCATCTGTCTGTGAT 4 1,1 CCCATCTGTTGGATCG 3 CCGCTGTTATCAGCGC 8 CCGCTGTTTGCAAGGG CCGCTGTTCGCAAGGG,CCGCTGTTTGCAAAGG 4 1,1 +CCGCTGTTTGTAGTTT 2 CGAACGTAATACTCTT CAAACGTAATACTCTT 7 1 CGAACGTACATTTGTT 4 CGAACGTAGGCGTTAG CGAACGTAGGAGTTAG 9 1 CGAACGTATCCCAATC 10 CGCTAATAGCATGGGT 5 +CGCTAATAGTGTAACC 2 CGGCACATTTTGTGTC 5 CGGCTTACAGAGGTGG 6 CGTATTTCTTGCATAT CGTAATTCTTGCATAT 9 1 @@ -192,19 +225,23 @@ CTATAGAGTGCCATCG 6 CTATAGAGTTTGTGTC CTATAGAGTTTGTGTA 4 1 CTTACGGGCCCATCTG 4 CTTACGGGCGGCACAT 4 +CTTACGGGGGAGAAGC 2 CTTACGGGTAGCTTAT 10 CTTCAGGTGGATAAAG 9 CTTCAGGTTAGCTTAT 9 +CTTCGATTCATCGCAG 2 CTTCGATTCTCGCGTA 6 CTTCGATTGAAGTGCC 4 CTTCGATTGCGTTGCT CTTCGATTTCGTTGCT 7 1 CTTCGATTTAAATAGG CTTCGATTTAAATAGT 11 1 CTTCGATTTGGGATTC 9 CTTCGATTTTGAGGGT 7 +CTTTAATCAGGAGGTA 2 CTTTAATCCGCTCTCA 6 -CTTTAATCGGGAGGTA CTTTAATCAGGAGGTA,CTTTAATCTGGAGGTA 7 2,1 +CTTTAATCGGGAGGTA 7 +CTTTCTTTAGAGAAGC 2 CTTTCTTTATTCCTTG 5 -CTTTCTTTGGAGAAGC CTTTCTTTAGAGAAGC 5 2 +CTTTCTTTGGAGAAGC 5 GAAAGATTGTACTTACCT 3 GAAAGATTGTGCTTACCT 8 GAAAGCGTACGTTAACCA GAAAGCGTACATTAACCA 8 1 @@ -218,12 +255,15 @@ GAACGCCATTTCTCAACC 13 GAACTAGGATTGCAAGGG 4 GAACTGCCGTGTCAATAC GAACTGCCGTTTCAATAC 11 1 GAAGGAAGACCAGATTAC 6 +GAAGGCAGTTAGAAGTCC GAAGGCAGTTCGAAGTCC 2 1 GAAGGCAGTTATATGCAT 9 GAAGGCAGTTCAGTTTGC 10 GAAGTTTAGAAGGCAACG 11 GAATCAAGTGGGATAAAG 4 +GAATGGAAATCGAATAAT GAATGGCAATCGAATAAT 2 1 GAATTCCTTGATTGGGCC 6 GAATTCCTTGTGACCCTC GAATTCCTTGAGACCCTC 7 1 +GACAATTAGTAACTTAGC 2 GACAATTAGTCTATAGAG GACAATTAGTCTATAAAG 3 1 GACACGGACTACCTTGCC 7 GACACGGACTCACGGACT GACACGGACTCGCGGACT 9 1 @@ -246,6 +286,7 @@ GACCTACTAGTGCGTATC 6 GACCTACTAGTTCACATA 4 GACCTGACACTAACCATC 4 GACGAATAATAAGATTGT 5 +GACGAATAATCCGTGTTT GACGAATAATCCGTATTT 2 1 GACGACTCCTGGGAACCT 3 GACGACTCCTGTTAACCA 4 GACGACTCCTTGACCCTC TACGACTCCTTGACCCTC 5 1 @@ -256,6 +297,7 @@ GACGAGATGTGGATAAAG 6 GACGAGATGTGTTTGTTT GACGCGATGTGTTTGTTT 15 1 GACGAGATGTTCGGTTTA 8 GACGAGATGTTGCGTATC 5 +GACGCTAGTCGCATGGGT 2 GACGCTAGTCGGCATGCT GACGCTAGTCAGCATGCT 3 1 GACGCTAGTCTGAGGTCT 9 GACGTCAGCACGTACCTA 5 @@ -277,8 +319,11 @@ GAGAATTCGTGAGAATTG 4 GAGACAGATAAGTTTAGA 6 GAGACAGATATACAGCCG GAGACAGATATACAGCCA 4 1 GAGACGATGGCCTGTTAT 4 +GAGACGATGGGGGTGTAT 2 +GAGACGATGGTGGTTTCT 2 GAGAGAATTGATATGCAT 7 GAGAGAATTGATGGAAAT GAGAGAATTGATAGAAAT 6 1 +GAGAGAATTGGAATACGC 2 GAGAGAATTGGATCGTTT 5 GAGAGAATTGGCATGGGT 8 GAGAGAATTGTACTTGTG 5 @@ -288,27 +333,36 @@ GAGAGCCCATGTAACGTT 4 GAGAGCCCATTTTGTGTC 8 GAGATCTCGGCAATTAGT 5 GAGATCTCGGGCATGGGT 3 +GAGATCTCGGTACAAACT 2 GAGATCTCGGTGTAGTTT GAGGTCTCGGTGTAGTTT 9 1 GAGATTGCGACTAGTAAC 5 GAGATTGCGAGTTACGAT GAGATTGCGAATTACGAT 5 1 +GAGATTGCGATAAATAGG 2 GAGCGTTGCTATCAAGTG 8 GAGCGTTGCTATCCGCTA 3 GAGCGTTGCTATTCCTTG 9 +GAGGACTTCCGGTGGGAT 2 GAGGCGTTAGCTTCAGGT 5 GAGGCGTTAGTTGATTCT 3 +GAGGGAACCTAGAGACTA 2 GAGGGAACCTTGCCATCG GAGGAAACCTTGCCATCG 6 1 GAGGGAGGTACTAGTAAC 3 GAGGTCCCTTGAGTTTCG 8 GAGGTCCCTTTACTTGTG 3 +GAGGTCCCTTTGGAGGAG 2 GAGTCCAAAGATGGATTA 5 GAGTCCAAAGCGAATAAT GAGTCCAAAGCGAATAAA,TAGTCCAAAGCGAATAAT 6 1,1 GAGTCCAAAGGGGAGGTA 5 GAGTCCAAAGTAGCCTCG 8 GAGTCCAAAGTGCCTCAC GAGTCCAAAGTGCCTGAC 6 1 +GAGTTAACCAACTGAGTT 2 GAGTTAACCAGATCTCGG GAGTTAACCAAATCTCGG 3 1 +GAGTTGTCATTACCGGCA 2 GAGTTTCGCAAGAGGG 5 GAGTTTCGGTAATCCC 10 +GATAACCATCGTTACGAT 2 GATAACCATCTCGAAGCT 3 +GATAAGACGGACTGAGTT 2 GATAAGACGGCTTACTCC 7 GATAAGACGGTAACCCGT 7 GATACAAACTATGGAAAT 5 @@ -319,6 +373,7 @@ GATAGTGTTTACACTAAG 5 GATAGTGTTTTACCAGGC 4 GATCCCAATCAATGTTTG 3 GATCCGTCCACGCTAATA GATCCGCCCACGCTAATA 5 1 +GATCCGTCCAGCAGCATG 2 GATCGGTACGCCAGCAGT GATCGGTACGCCAGCAGG 8 1 GATCGGTACGTTGATCTA 4 GATCGGTTTATTCGGCCT GATCGATTTATTCGGCCT 7 1 @@ -333,6 +388,7 @@ GATCTGTGGTTTTGTGTC 7 GATGACCCTCGGCCCTTA 6 GATGAGGTCTAGAAGTCC 6 GATGAGGTCTTGACCCTC 7 +GATGAGGTCTTGTGTCCT 2 GATGAGGTCTTTGTTTAC 7 GATGAGGTCTTTTAACAG 6 GATGCCATCGCTTCAGGT 3 @@ -341,17 +397,21 @@ GATGGGATTCCGGCACAT GATGGGATTCCGTCACAT 5 1 GATGGGATTCTCGGTTTA 7 GATGGTTTCTGCAAGGAC 4 GATGGTTTCTTAGAAATG 4 +GATGGTTTCTTCGAAGCT 2 GATGGTTTCTTTGAGGGT GATGGTTTCTTTGAGGTT 5 1 GATGTAAAGGCCCTTCAG 7 GATGTAAAGGGGATAAAG 3 +GATGTAAAGGGTTGCACG GATGTAAAGGGTTGCAAG 2 1 GATGTACACGAAAGCCTA 4 GATGTACACGCATCTCCC 7 GATGTAGTTTCAGATTAC 12 GATGTAGTTTCTGTCTGG 4 GATGTAGTTTCTTACTCC 8 +GATGTAGTTTGATGTATT 2 GATGTGAGAGGAAGAC 3 GATGTGAGCGAATAAT 7 GATGTGAGGCAAAGCC GATGTGAGGCAAGGCC 11 1 +GATGTGAGTAACCCGT 2 GATGTGAGTCGGTACG GATGTGAGTCGGTACT 3 1 GATGTGAGTCGGTTTA GATGTGAGTCGATTTA,GATGTGAGTCGGTTTG 4 1,1 GATGTTCCAGATTGGGCC 4 @@ -363,6 +423,7 @@ GATTCCGAGTCTTTCTTT 11 GATTGAGGGTAACCTGAC 12 GATTGAGGGTTAGCCTCG GATTGAGGGTTAGCTTCG 9 1 GATTGATCTAAGTTTAGA 4 +GATTGATCTATCATATGG 2 GATTGGCGTTGGTCCCTT 5 GATTGGCGTTTAGTCTAG 7 GATTTAACAGAACCTGAC GATTTAAAAGAACCTGAC 5 1 @@ -372,15 +433,20 @@ GATTTAACAGTCATATGG 6 GATTTAACAGTGCCTCAC GATTTAACAGTACCTCAC 5 1 GCAGCATGACACTAAG GCAGCATGACACTCAG 6 1 GCAGCATGATGGGCAC CCAGCATGATGGGCAC 7 1 +GCAGCATGCCCATAGC TCAGCATGCCCATAGC 2 1 GCAGCATGGATCTCGG 3 +GCATGGGTAAATCAGA 2 GCATGGGTTAACCCGT 4 GCTTACCTCTACTTGT 8 GCTTTGGCACTAGCCA 4 +GCTTTGGCCCCAAGCA 2 GGAAACAGACTAGGAT 3 GGAAACAGTACCAGGC 3 GGAAACAGTCCCAATC 6 GGAGAAGCCCACATTA 5 GGATAAAGAGAGACTA 5 +GGATAAAGTTGGCGTT 2 +GGCATGCTCGCTAGTC 2 GGCATGCTTTCCGCTC 6 GGGTTGGTGTTCAACT 4 GTCTCTCTAACTTAGC GTCTCTCTAACGTAGC 7 1 @@ -392,6 +458,7 @@ GTCTCTCTTGCGACAG 7 GTCTCTCTTTGTGACT 8 GTCTCTCTTTTATCAC 10 GTGTAACCTGCGACAG 5 +GTTACGATATCAGCGC GTTACGATGTCAGCGC 2 1 GTTACGATATGGAAAT 4 GTTCAACTCCGTGTTT GTTCAACTCCGTTTTT 6 1 GTTCAACTTACAGCCG 5 @@ -419,12 +486,16 @@ TAGCTTATTGTAAAGG 3 TAGTCTAGCGACGTCA TAGTCTAGCGACGTCC 6 1 TAGTCTAGTTATAGCC 5 TAGTCTAGTTGATCTA 7 -TCATATGGTCTGCATC TCATATGGACTGCATC 5 2 +TCATATGGACTGCATC 2 +TCATATGGTCTGCATC 5 TCATCCTTCGTATTTC 10 TCATCCTTTCCAGTCC TCATCCTTTCTAGTCC 6 1 TCATCCTTTTCCGCTC TTATCCTTTTCCGCTC 8 1 +TCCTTATTAGCAGAAC 2 TCCTTATTCAGTCCCT 10 TCCTTATTTAGTCTAG 6 +TCGAAGCTGGGAACCT 2 +TCGAAGCTGGTGGGAT 2 TCGAAGCTGTTACGAT 4 TCGAAGCTTAACCATC TCGAAGCTAAACCATC 7 1 TCGAAGCTTGCCTCAC 8 @@ -434,7 +505,9 @@ TGAAAACTGGTAACTTAGC 8 TGAAAACTGGTACTTCAAT 3 TGAAAACTGGTCCCATCTG TGAAAACTGGTCCCATCTA 8 1 TGAAAACTGGTCTTTAATC 6 -TGAAAATGTCGGATTGCGA TGAAAATGTCGAATTGCGA 4 2 +TGAAAATGTCGAATTGCGA 2 +TGAAAATGTCGGATTGCGA 4 +TGAAAATGTCGGGTGGGAT 2 TGAAACAATCCATATGCAT 12 TGAAACACACGCTTTCTTT TGAAACACACGCTTTCTTA 16 1 TGAAACACACGTACCGGCA 3 @@ -442,10 +515,14 @@ TGAAACCCTTGAATGTTTG 6 TGAAACCCTTGCAATTAGT 9 TGAAACCCTTGGGAGAAGC TGAAACCCTTGAGAGAAGC 4 1 TGAACATCTATGAGCCCAT TGAACATCTATAAGCCCAT,TGAACATCTATGAGCACAT 8 1,1 +TGAACGCTCAGCGAACGTA 2 TGAACGCTCAGTGACCCTC TGAACGCTCAGTGACTCTC 12 1 TGAAGAAGTTAACTTCAAT 5 TGAAGAAGTTACACAAGGC TGAAGAAGTTATACAAGGC 5 1 TGAAGCGTAGGCCTACTAG 9 +TGAAGGAGCTTCCCATCTG 2 +TGAAGGAGCTTGAAAGACC 2 +TGAAGGAGCTTGCAGCATG 2 TGAAGGAGCTTTCGAAGCT 7 TGAAGGTTGTGAGGCAGTT 6 TGAAGGTTGTGGCAGTCGA 8 @@ -464,11 +541,15 @@ TGAATGCATGGCATCGCAG 5 TGAATGCATGGCATCTCCC 10 TGAATGCATGGCGAATAAT 3 TGAATTCGACGGCGTTGCT 10 +TGAATTCGACGTTTGGGAG 2 TGACAACAAATTAGTGTTT TGAAAACAAATTAGTGTTT 7 1 +TGACAACAAATTTATAGCC 2 TGACAAGTCATATTCCTTG TGACAAGTCATTTTCCTTG 6 1 TGACAAGTCATCTTCTTCG TGGCAAGTCATCTTCTTCG 4 1 TGACAAGTCATTTAAGCGT 7 TGACAATACTTATTCCTTG 16 +TGACAATACTTGGTGGGAT 2 +TGACACAACAGCTTCAGGT 2 TGACACAACAGGAGCGGTA 7 TGACACAACAGGCCTCTTT 4 TGACACAACAGTGATGCCC 6 @@ -478,12 +559,15 @@ TGACACAGTTTTGTACACG 9 TGACACTTTCTAGCACCAC 5 TGACACTTTCTGACGATGG 9 TGACACTTTCTTAACTACT 3 +TGACAGACCATCCTACTAG 2 TGACAGACCATCCTATTCA 4 TGACAGACCATGGTCCCTT TGACAGACCATAGTCCCTT 10 1 TGACATACATTGAGTTTCG TGACATACATTAAGTTTCG 3 1 TGACATACATTGGACTTCC TGACATACATTAGACTTCC 4 1 TGACATACATTTACAAACT 10 TGACCAGCAGTCCACATTA 6 +TGACCAGCAGTCGTGGATA 2 +TGACCAGTACTGCCGT 2 TGACCAGTGTCAAAGA 7 TGACCAGTTACTTGTG TGAGCAGTTACTTGTG 3 1 TGACCATTACTCTTACTCC 11 @@ -493,6 +577,8 @@ TGACCTCGTCTAGGAAGAC 7 TGACCTCGTCTCGAGATGT 9 TGACCTCGTCTTGCTATTT TGACCTCATCTTGCTATTT,TGACCTCGTCTTCCTATTT,TGACCTCGTCTTGCTATTC 6 1,1,1 TGACCTCGTCTTTCACATA 11 +TGACCTGTTATTCTGCATC 2 +TGACGTAGAAGATTCAGTA 2 TGACGTAGAAGCAACAAAT 9 TGACGTAGAAGTTATAGCC 9 TGACGTATCGGGAAAGACC 3 @@ -502,42 +588,53 @@ TGACTCAGAATCATCTCCC 10 TGACTCAGAATTTATAGCC 6 TGACTGTTCTTACCTTGCC 4 TGACTGTTCTTACTTCAAT TGACTGTCCTTACTTCAAT 8 1 +TGACTTCGCACTGAGGTCT 2 TGACTTGGTGTAGAAGTCC 3 TGACTTGGTGTTTAAGCGT 4 +TGAGAAGCACTATCCGCTA 2 TGAGAAGCACTCAGTCCCT TGAGAAGCATTCAGTCCCT 3 1 TGAGAAGCACTCCCATCTG TGAGAAGCACTCCCATGTG 11 1 TGAGAAGCACTGTTTGTTT 6 TGAGACAAAGGATTTCCGG 8 TGAGACAACTTCAGTTTGC 8 +TGAGACAACTTCGGCTTAC 2 +TGAGACAACTTTGTAGTTT 2 TGAGAGTTCACTCGACACC 13 TGAGATTTACCCTAGTAAC TGAGATTTACCGTAGTAAC 3 1 TGAGCAGGGTATTTGTGTC TGAGTAGGGTATTTGTGTC 13 1 +TGAGCCACATCTGTAGTTT 2 +TGAGCCGGATTACTAGCCA 2 TGAGCCGGATTATCCCACG 8 TGAGCGCATTCCCTATTCA TGAGCGCATTCGCTATTCA 6 1 TGAGCGCATTCCGTCAGCA 8 TGAGCGCATTCTCGGTACG 11 TGAGGAATAGAGGCATGCT 6 TGAGGAATAGATAGCTTAT 7 +TGAGGCTACTACTTTAATC 2 TGAGGGAAATCGAGCCCAT 4 +TGAGGTTTCTCAATTGCGA 2 TGAGGTTTCTCAGAGACTA 4 TGAGGTTTCTCAGCACCAC TGAGGTTTCTCAGCACCAT 10 1 -TGAGGTTTCTCGATTGCGA TGAGGTTTCTCAATTGCGA 5 2 +TGAGGTTTCTCGATTGCGA 5 TGAGTAATCCCTAGCCTCG 4 TGAGTACACTCGAATACGC 6 TGAGTCCATGTAAACGCCT 3 TGAGTCCATGTCAAGAGGG 11 TGAGTCCATGTCGGCTTAC 4 TGAGTCCATGTGGCGGTTT TGAGTCCATGTGGTGGTTT 5 1 +TGAGTCCATGTGTGTAACC 2 TGAGTCCATGTTCTCAACC TGAGTCCATGTTCTCAAAC 11 1 TGAGTCCGTACATTGTCAT 3 TGAGTCCGTACGTTGTCAT 5 TGAGTCGTCGTGAGCCCAT 5 +TGAGTCGTCGTGTTCTGTT 2 +TGAGTGGAGCTACATGGGT 2 TGAGTGGAGCTACCACGCT 4 TGAGTGGAGCTATGACCGA 9 TGAGTGGAGCTCCCAAGCA 7 TGAGTGGAGCTCTTCTTCG 7 TGAGTGGAGCTGATTAGAC 3 -TGAGTGGAGCTGCATGGGT TGAGTGGAGCTACATGGGT,TGAGTGGCGCTGCATGGGT 6 2,1 +TGAGTGGAGCTGCATGGGT TGAGTGGCGCTGCATGGGT 6 1 TGAGTGGAGCTGTTTGTTT 4 TGAGTGGAGCTTCCCAATC 12 TGAGTGGAGCTTGCAAGGG 11 @@ -552,9 +649,11 @@ TGATACGTGCTAGTGAAAG 7 TGATACGTGCTCGAGATGT 5 TGATACGTGCTGTTCAACT 10 TGATACGTGCTTTTACCCT 3 +TGATAGAAATGAGACCCTC 2 TGATAGAAATGCCCGAATG 4 TGATAGAAATGGGTGGGAT TGATAGAAATGGGTGGAAT 6 1 -TGATAGAAATGTGACCCTC TGATAGAAATGAGACCCTC,TGATAGAAATGTGACCCTT 10 2,1 +TGATAGAAATGTGACCCTC TGATAGAAATGTGACCCTT 10 1 +TGATAGAAATGTGAGGTCT 2 TGATATTGCCTAAATCAGA 14 TGATATTGCCTGGTGGGAT 3 TGATCACCGAGCGCTAATA 7 @@ -568,6 +667,7 @@ TGATCGTCATGTGCGACAG 6 TGATGCCCATATGCAT 3 TGATGTCTTTCGGTGGGAT 7 TGATGTTTGAGACACTAAG TGATGTTTGAGGCACTAAG 9 1 +TGATGTTTGAGAGAGACTA 2 TGATTCGCTGGAAATCAGA 4 TGATTCGCTGGCGAATAAT 7 TGATTCGCTGGGCATGGGT TGATTCGCTGGACATGGGT 5 1 @@ -578,16 +678,21 @@ TGATTGCACGCTGTGCTCA 8 TGATTGTCGCCCGGCACAT 6 TGATTTACCCTAGAGACTA TGATTTACCCTAGAAACTA 12 1 TGATTTACCCTGAATACGC 7 +TGATTTACCCTTGTAAAGG 2 TGCAAGGGATGGATTA 4 TGCAAGGGGAATACGC 4 TGCAAGGGTAAATAGG 3 +TGCAAGGGTGGAGGAG 2 +TGCCTCACAGCAGAAC 2 TGCCTCACCTTCTGGA 5 TGCGACAGCTTTCTTT 5 TGCGACAGGGCGTTAG 6 TGCGACAGGGCTACTA 4 +TGCTATTTATTTCCGG 2 TGGCTACCGAGCCCAT 4 TGGCTACCTAGCTTAT 9 TGGCTACCTGCCATCG 7 +TGGCTACCTGGTTTCT TGGCTACCAGGTTTCT 2 1 TGGTATGAAGAGACTA 8 TTAAGCGTGACAAAGG 8 TTAAGCGTGAGCCCAT 6 @@ -598,8 +703,10 @@ TTATCTGTTGACGGAC 4 TTATCTGTTTATCTGT 7 TTATCTGTTTGAGGGT 4 TTATCTGTTTGATCTA TTATCTGTATGATCTA 3 1 +TTCGGCCTGCTTTGGC 2 TTGCATATAGGCAACG 5 TTGCATATCGAATAAT 10 +TTGCATATGGGAACCT 2 TTGGACTTAATGTTTG 4 TTGGACTTAGGCAACG 3 TTGGACTTGAGTTTCG 15 diff --git a/tests/indrop_whitelist_user_density.tsv b/tests/indrop_whitelist_user_density.tsv new file mode 100644 index 00000000..0013184d --- /dev/null +++ b/tests/indrop_whitelist_user_density.tsv @@ -0,0 +1,607 @@ +AAAACCTCCCGACTCCT AAAACCTCCCGACACCT 3 1 +AAAACCTCCTGTACACG 3 +AAAACGCCTATGACTTT 5 +AAAACTCGACATCGCAG 5 +AAAAGTCGGTCCTTATT 6 +AAAATCGTTGCTCTAGT AAAATCGTTACTCTAGT 6 1 +AAAATCGTTGGTGGGAT AAAATCGTTAGTGGGAT 4 1 +AAAGGTAATACACTAAG 10 +AAAGGTAATAGAGACTA 8 +AAAGTAATCAGAACGGG 7 +AAAGTAATCCGGCACAT AAAGTAATCAGGCACAT 3 1 +AAATCGGGTCACGGACT 7 +AAATCGGGTGGCGTTAG 11 +AAATGAATGTGTAGTTT AAATGAATGTGTGGTTT 9 1 +AAATGACCCGAGCGGTA AAATGACCCAAGCGGTA 4 1 +AAATGACCCTAACCATC 3 +AACCCATATTAACCATC 15 +AACGAAACGATGGAAAT 3 +AACGAAACGGAGCCCAT 3 +AACGAAACGGTTCAACT 15 +AACTCACCGACGTATAC AACTCACCGGCGTATAC 6 1 +AACTCACCGAGGAAGAC 9 +AACTCACCGCCTATTCA 9 +AACTCACCGTGACCCTC 4 +AACTGAGTTTGCCTCAC 7 +AACTTAGCGTCCAAAG 4 +AAGACACCACTAGATTG 7 +AAGATGGCTATGGGCAC 11 +AAGATGGCTGATCTCGG 6 +AAGATGGCTTGATGCCC 7 +AAGATGGCTTGGCTACC AAGATGGCTAGGCTACC 11 1 +AAGCGAAGTATTCAGTA 10 +AAGCGAAGTCTCCGCAT 8 +AAGCGAAGTTCACCGAG 5 +AAGCGCCTTGCTTTGGC AAGCGCCTTACTTTGGC 4 1 +AAGCGCCTTGGTGGGAT 3 +AAGCGCCTTTGTTTGAG 6 +AAGCTACGGTCATATGG 10 +AAGCTTCTCTTTCTTT 5 +AAGCTTCTGCTTACCT 7 +AAGTGAAAGATATGCAT 5 +AATACTCTTAATTCCCA 13 +AATACTCTTACACTAAG 10 +AATACTCTTCACAAGGC AATACTCTTGACAAGGC 5 1 +AATATACCTAGGCAGTT 8 +AATATCTTCATGACTTT 3 +AATATCTTCGGGAAATC AATATCTTCAGGAAATC 3 1 +AATCCCACGATCAGCGC 11 +AATCCCACGCACGGACT 9 +AATGCGGATTCCAGTCC AATGCGGATTCCAGTCA 3 1 +AATGTTTGCCCTTCAG 6 +AATGTTTGGGGAACCT AATGTTTGAGGAACCT 5 2 +ACAGATTACCGAACGTA 4 +ACAGATTACCTTACTCC 3 +ACAGGTGTCAAATCAGA 9 +ACAGGTGTCCCCAAGCA 3 +ACCATTTGAGTTGTCAT 4 +ACCATTTGATCCCAATC 7 +ACCCACGAGGGTTGGT ACCCACGAAGGTTGGT 9 1 +ACCCACGATAACTACT 4 +ACCCACGATTGAGGGT 6 +ACCCGACTTGCAAGGAC ACCCGACTTACAAGGAC 9 1 +ACCCGACTTGTTGTCAT ACCCGACTTATTGTCAT 6 1 +ACCCGACTTTCATATGG ACCCGACGTTCATATGG 4 1 +ACCCGACTTTGCAAGGG 9 +ACCCTAACCATGGATTA 10 +ACCCTTGGTAAGCGTAC 5 +ACCCTTGGTATGTTGGC 4 +ACCGCAACTAGAGGTGG 4 +ACCGTGTTTCTGTTCTT 5 +ACCGTGTTTTGGTTTCT 10 +ACCTTCTTACTTCAAT 6 +ACCTTCTTCTTTCTTT 10 +ACCTTCTTTGCGTATC 11 +ACCTTCTTTTGGATCG 12 +ACGCTCTCAACTAGCCA 6 +ACGTGCTAGTCGGTTTA 17 +ACTACTTGTTTCCGAGT 4 +ACTAGATTGACTAGGAT 11 +ACTAGATTGTCGGTACG 14 +ACTGCGTTGAAGCTTCT 3 +ACTTAGGTAAGTGATGC 7 +ACTTCAATCTTTAATC 5 +ACTTCAATTGGGATTC 14 +AGAACGATTTGTAAAGG 3 +AGAGACTAACTGCCGT 8 +AGAGACTACCGCTGTT 3 +AGAGACTATGAAATGA AGAGACTATGTAATGA 11 1 +AGAGACTATGACGGAC 10 +AGAGGTGGCCCATCTG 9 +AGAGGTGGCGACGTCA 5 +AGATGTATTAAGCTTCT AGATGTATTACGCTTCT 7 1 +AGATGTATTCCACATTA 8 +AGATGTATTCGATTGAT 3 +AGATGTATTTGTACACG 10 +AGCAACCTGCGGCTTAC 8 +AGCACCACCATCTCCC 4 +AGCACCTCTTACCAGGC 6 +AGCCAAGATGTTGTCAT 4 +AGCCAAGATTGAGGTCT 13 +AGCCTCTTTAATTCCCA AGCCTCTTTAATTCCCC,AGCCTCTTTGATTCCCA 10 1,1 +AGCCTCTTTCAGTCCCT 8 +AGCTTTCCATACAGCCG 4 +AGGCAACGTCATATGG 13 +AGGCAACGTTTAACAG 9 +AGGCCTAAGGGTGGGAT 5 +AGGGAACGACTTCGCAC 9 +AGGGAACGATTGCATAT 9 +AGGGAAGGTGGCGTTAG CGGGAAGGTGGCGTTAG 9 1 +AGGGAAGGTTACTTGTG 3 +AGGGCCAATTCTCACTT 5 +AGGGTGTATCAGTTTGC 8 +AGGGTGTATCTTGGTGT 4 +AGGGTGTATGACAGATA CGGGTGTATGACAGATA 5 1 +AGGTGACACCGAGATGT 4 +AGGTTAGTGAAGGTAAT 10 +AGGTTAGTGACCACGCT 3 +AGGTTAGTGTCGAAGCT CGGTTAGTGTCGAAGCT 6 1 +AGTATGAGTAGAGGTGG 4 +AGTATGAGTCCCATCTG 5 +AGTATGAGTTGCCTCAC AGTATGAGTAGCCTCAC 7 1 +AGTCAAAGACCTGACAC 7 +AGTCAATACACTGCCGT CGTCAATACACTGCCGT 5 1 +AGTCAATACTTGCATAT 4 +AGTGGATGGGAGAGTAT 3 +AGTGTCGGACGGGCTTT 6 +AGTTTACGTACTAGGAT 5 +AGTTTACGTCCCTAACC 4 +AGTTTACGTTGACGGAC 3 +AGTTTACGTTTAAACTG 4 +ATAGTCGCAATCAAGTG 6 +ATAGTGGACCGAGATGT ATAGTGGACGGAGATGT 6 1 +ATATGCATGTACCTTG 4 +ATCACGTTTAATGACCC ATCACGTTTACTGACCC 10 1 +ATCACGTTTCCACATTA 8 +ATCACGTTTCGAATAAT 5 +ATCACGTTTTCATATGG 7 +ATCCGCTAGGATAAAG 3 +ATCCGCTATGTAAAGG 6 +ATCGGTTCGAACCTGAC 11 +ATCGGTTCGCAAGTCAT 6 +ATCGGTTCGGATTAGAC 10 +ATCTGCATCAGCAGAAC GTCTGCATCAGCAGAAC 4 1 +ATCTTTGTCTTCAGGT ATCTTTGTCTTCAGGA,CTCTTTGTCTTCAGGT 3 1,1 +ATCTTTGTGTCCATGT ATCTTTGTATCCATGT 6 1 +ATCTTTGTTAACCATC 7 +ATGCTCCGTGTTACGAT 9 +ATGCTCCGTTTATCTGT 4 +ATGGCCTGTAATGTTTG 9 +ATGGCCTGTAGCACCAC 7 +ATGGCCTGTGTCCGTAC 3 +ATGGCCTGTGTTGTCAT 4 +ATGGCCTGTTGACGGAC 3 +ATGTGTCCTATGGAAAT ATGTGTCCTGTGGAAAT 13 1 +ATGTGTCCTGCTTTGGC ATATGTCCTGCTTTGGC 6 1 +ATGTGTCCTTAACCATC 6 +ATTCCAGACTTGATCTA 6 +ATTCCAGACTTGTGACT 7 +ATTGATTCTATCAAGTG ATTGATTCTGTCAAGTG 3 1 +ATTGATTCTGTTAACCA ATTGATTCTATTAACCA,TTTGATTCTGTTAACCA 4 1,1 +ATTGATTCTTGTAAAGG ATTGATTCTTGTAAAGA 6 1 +ATTGGATCGATGTCGGA 4 +ATTGGATCGGTGTCGGA 3 +ATTGTGACTTCCGTCCA 6 +ATTTCCGGCGAATAAT CTTTCCGGCGAATAAT 8 1 +ATTTCCGGGTTACGAT ATTTCCGGATTACGAT 8 1 +ATTTCCGGTACTTGTG 9 +CAGTTTGCGGCGTTAG CAGTTTACGGCGTTAG 7 1 +CATCGCAGCGACTCCT 5 +CATCGCAGCTTCAGGT 6 +CATCGCAGTAGCCTCG 4 +CATCGCAGTTATAGCC 14 +CATCGCAGTTTATCAC 4 +CCAACCGTCCACATTA 7 +CCAACCGTCGGGCTTT 9 +CCAACCGTGTTCAACT 12 +CCAACCGTTAGTCTAG 7 +CCCATCTGTCTGTGGT CCCATCTGTCCGTGGT,CCCATCTGTCTGTGAT 4 1,1 +CCCATCTGTTGGATCG 3 +CCGCTGTTATCAGCGC 8 +CCGCTGTTTGCAAGGG CCGCTGTTCGCAAGGG,CCGCTGTTTGCAAAGG 4 1,1 +CGAACGTAATACTCTT CAAACGTAATACTCTT 7 1 +CGAACGTACATTTGTT 4 +CGAACGTAGGCGTTAG CGAACGTAGGAGTTAG 9 1 +CGAACGTATCCCAATC 10 +CGCTAATAGCATGGGT 5 +CGGCACATTTTGTGTC 5 +CGGCTTACAGAGGTGG 6 +CGTATTTCTTGCATAT CGTAATTCTTGCATAT 9 1 +CTATAGAGCGAACGTA 5 +CTATAGAGTGCCATCG 6 +CTATAGAGTTTGTGTC CTATAGAGTTTGTGTA 4 1 +CTTACGGGCCCATCTG 4 +CTTACGGGCGGCACAT 4 +CTTACGGGTAGCTTAT 10 +CTTCAGGTGGATAAAG 9 +CTTCAGGTTAGCTTAT 9 +CTTCGATTCTCGCGTA 6 +CTTCGATTGAAGTGCC 4 +CTTCGATTGCGTTGCT CTTCGATTTCGTTGCT 7 1 +CTTCGATTTAAATAGG CTTCGATTTAAATAGT 11 1 +CTTCGATTTGGGATTC 9 +CTTCGATTTTGAGGGT 7 +CTTTAATCCGCTCTCA 6 +CTTTAATCGGGAGGTA CTTTAATCAGGAGGTA,CTTTAATCTGGAGGTA 7 2,1 +CTTTCTTTATTCCTTG 5 +CTTTCTTTGGAGAAGC CTTTCTTTAGAGAAGC 5 2 +GAAAGATTGTACTTACCT 3 +GAAAGATTGTGCTTACCT 8 +GAAAGCGTACGTTAACCA GAAAGCGTACATTAACCA 8 1 +GAACACTAAGATGGAAAT 8 +GAACACTAAGCCCAAGCA 3 +GAACACTAAGTGGTATGA 7 +GAACACTAAGTTGGACTT 3 +GAACGCCATTATTTCCGG 8 +GAACGCCATTCATCGCAG 11 +GAACGCCATTTCTCAACC 13 +GAACTAGGATTGCAAGGG 4 +GAACTGCCGTGTCAATAC GAACTGCCGTTTCAATAC 11 1 +GAAGGAAGACCAGATTAC 6 +GAAGGCAGTTATATGCAT 9 +GAAGGCAGTTCAGTTTGC 10 +GAAGTTTAGAAGGCAACG 11 +GAATCAAGTGGGATAAAG 4 +GAATTCCTTGATTGGGCC 6 +GAATTCCTTGTGACCCTC GAATTCCTTGAGACCCTC 7 1 +GACAATTAGTCTATAGAG GACAATTAGTCTATAAAG 3 1 +GACACGGACTACCTTGCC 7 +GACACGGACTCACGGACT GACACGGACTCGCGGACT 9 1 +GACACGGACTCCGCAACT 6 +GACACGGACTCGGGCTTT 3 +GACACGGACTTTAAGCGT 5 +GACAGTCCCTTAACCATC 6 +GACATTTGTTGAATACGC GACATTTGTTGAATACGA 13 1 +GACATTTGTTTTTAACAG 7 +GACCACATTACAATTAGT 3 +GACCACATTATGCAAGGG 9 +GACCCATAGCTGAGGTCT 3 +GACCCGAATGAATGACCC 8 +GACCCGAATGGCATGGGT 3 +GACCCGAATGTGACCAGT 7 +GACCCTTCAGATTCCTTG GACTCTTCAGATTCCTTG 4 1 +GACCTACTAGCCTACTAG GACCTACTAGCCTACTAA 11 1 +GACCTACTAGGGTGGGAT GACCTACTAGAGTGGGAT 11 1 +GACCTACTAGTGCGTATC 6 +GACCTACTAGTTCACATA 4 +GACCTGACACTAACCATC 4 +GACGAATAATAAGATTGT 5 +GACGACTCCTGGGAACCT 3 +GACGACTCCTGTTAACCA 4 +GACGACTCCTTGACCCTC TACGACTCCTTGACCCTC 5 1 +GACGACTCCTTGGTTTCT 5 +GACGAGATGTCACGGACT 5 +GACGAGATGTGAGGCTGA GACGAGATGTAAGGCTGA 4 1 +GACGAGATGTGGATAAAG 6 +GACGAGATGTGTTTGTTT GACGCGATGTGTTTGTTT 15 1 +GACGAGATGTTCGGTTTA 8 +GACGAGATGTTGCGTATC 5 +GACGCTAGTCGGCATGCT GACGCTAGTCAGCATGCT 3 1 +GACGCTAGTCTGAGGTCT 9 +GACGTCAGCACGTACCTA 5 +GACTCTTGACGCTTTGGC GACTCTTGACACTTTGGC 3 1 +GACTGTCTGGCTCTTGAC GACTGTCTGGCTCTTGAA 8 1 +GACTGTCTGGTACAGCCG 6 +GACTGTCTGGTGTACACG GACTGTCTGGCGTACACG 3 1 +GACTTACTCCATTACGAT 3 +GACTTACTCCTCCGTCCA 9 +GACTTCTTCGAGGCAGTT 4 +GACTTCTTCGATCCCACG 6 +GAGAAGGCTTACATCTAT 7 +GAGAAGGCTTGAATACGC GAGAAGGCTTAAATACGC 7 1 +GAGAAGGCTTTCCAGTCC 3 +GAGAAGGCTTTTAGTCCG 3 +GAGAATACGCCGGCACAT 5 +GAGAATTCGTATGGGCAC 6 +GAGAATTCGTGAGAATTG 4 +GAGACAGATAAGTTTAGA 6 +GAGACAGATATACAGCCG GAGACAGATATACAGCCA 4 1 +GAGACGATGGCCTGTTAT 4 +GAGAGAATTGATATGCAT 7 +GAGAGAATTGATGGAAAT GAGAGAATTGATAGAAAT 6 1 +GAGAGAATTGGATCGTTT 5 +GAGAGAATTGGCATGGGT 8 +GAGAGAATTGTACTTGTG 5 +GAGAGAGTATATTCGACG 6 +GAGAGAGTATGCGTTGCT 3 +GAGAGCCCATGTAACGTT 4 +GAGAGCCCATTTTGTGTC 8 +GAGATCTCGGCAATTAGT 5 +GAGATCTCGGGCATGGGT 3 +GAGATCTCGGTGTAGTTT GAGGTCTCGGTGTAGTTT 9 1 +GAGATTGCGACTAGTAAC 5 +GAGATTGCGAGTTACGAT GAGATTGCGAATTACGAT 5 1 +GAGCGTTGCTATCAAGTG 8 +GAGCGTTGCTATCCGCTA 3 +GAGCGTTGCTATTCCTTG 9 +GAGGCGTTAGCTTCAGGT 5 +GAGGCGTTAGTTGATTCT 3 +GAGGGAACCTTGCCATCG GAGGAAACCTTGCCATCG 6 1 +GAGGGAGGTACTAGTAAC 3 +GAGGTCCCTTGAGTTTCG 8 +GAGGTCCCTTTACTTGTG 3 +GAGTCCAAAGATGGATTA 5 +GAGTCCAAAGCGAATAAT GAGTCCAAAGCGAATAAA,TAGTCCAAAGCGAATAAT 6 1,1 +GAGTCCAAAGGGGAGGTA 5 +GAGTCCAAAGTAGCCTCG 8 +GAGTCCAAAGTGCCTCAC GAGTCCAAAGTGCCTGAC 6 1 +GAGTTAACCAGATCTCGG GAGTTAACCAAATCTCGG 3 1 +GAGTTTCGCAAGAGGG 5 +GAGTTTCGGTAATCCC 10 +GATAACCATCTCGAAGCT 3 +GATAAGACGGCTTACTCC 7 +GATAAGACGGTAACCCGT 7 +GATACAAACTATGGAAAT 5 +GATACAAACTCACGGACT 3 +GATACAAACTGAACGATT 4 +GATACAAACTGTAACGTT 7 +GATAGTGTTTACACTAAG 5 +GATAGTGTTTTACCAGGC 4 +GATCCCAATCAATGTTTG 3 +GATCCGTCCACGCTAATA GATCCGCCCACGCTAATA 5 1 +GATCGGTACGCCAGCAGT GATCGGTACGCCAGCAGG 8 1 +GATCGGTACGTTGATCTA 4 +GATCGGTTTATTCGGCCT GATCGATTTATTCGGCCT 7 1 +GATCGTTTGAGCGGTA GATCGTTTAAGCGGTA 4 1 +GATCGTTTGCAGCATG 4 +GATCGTTTTCATATGG 4 +GATCTCAACCACTGAGTT 3 +GATCTCAACCTAGCCTCG 3 +GATCTCAACCTGGCCTGT GATCTCAACCTGACCTGT 6 1 +GATCTGTGGTGGGTTGGT 5 +GATCTGTGGTTTTGTGTC 7 +GATGACCCTCGGCCCTTA 6 +GATGAGGTCTAGAAGTCC 6 +GATGAGGTCTTGACCCTC 7 +GATGAGGTCTTTGTTTAC 7 +GATGAGGTCTTTTAACAG 6 +GATGCCATCGCTTCAGGT 3 +GATGGAGGAGGATGTGAG GATGGAGGAGAATGTGAG 3 1 +GATGGGATTCCGGCACAT GATGGGATTCCGTCACAT 5 1 +GATGGGATTCTCGGTTTA 7 +GATGGTTTCTGCAAGGAC 4 +GATGGTTTCTTAGAAATG 4 +GATGGTTTCTTTGAGGGT GATGGTTTCTTTGAGGTT 5 1 +GATGTAAAGGCCCTTCAG 7 +GATGTAAAGGGGATAAAG 3 +GATGTACACGAAAGCCTA 4 +GATGTACACGCATCTCCC 7 +GATGTAGTTTCAGATTAC 12 +GATGTAGTTTCTGTCTGG 4 +GATGTAGTTTCTTACTCC 8 +GATGTGAGAGGAAGAC 3 +GATGTGAGCGAATAAT 7 +GATGTGAGGCAAAGCC GATGTGAGGCAAGGCC 11 1 +GATGTGAGTCGGTACG GATGTGAGTCGGTACT 3 1 +GATGTGAGTCGGTTTA GATGTGAGTCGATTTA,GATGTGAGTCGGTTTG 4 1,1 +GATGTTCCAGATTGGGCC 4 +GATTAGACGTAACGTT 3 +GATTAGACTAACTACT 10 +GATTAGACTGGCCTGT GATTAGACAGGCCTGT 6 1 +GATTCCGAGTATGGGCAC 5 +GATTCCGAGTCTTTCTTT 11 +GATTGAGGGTAACCTGAC 12 +GATTGAGGGTTAGCCTCG GATTGAGGGTTAGCTTCG 9 1 +GATTGATCTAAGTTTAGA 4 +GATTGGCGTTGGTCCCTT 5 +GATTGGCGTTTAGTCTAG 7 +GATTTAACAGAACCTGAC GATTTAAAAGAACCTGAC 5 1 +GATTTAACAGGATGTGAG 8 +GATTTAACAGTAACTACT 13 +GATTTAACAGTCATATGG 6 +GATTTAACAGTGCCTCAC GATTTAACAGTACCTCAC 5 1 +GCAGCATGACACTAAG GCAGCATGACACTCAG 6 1 +GCAGCATGATGGGCAC CCAGCATGATGGGCAC 7 1 +GCAGCATGGATCTCGG 3 +GCATGGGTTAACCCGT 4 +GCTTACCTCTACTTGT 8 +GCTTTGGCACTAGCCA 4 +GGAAACAGACTAGGAT 3 +GGAAACAGTACCAGGC 3 +GGAAACAGTCCCAATC 6 +GGAGAAGCCCACATTA 5 +GGATAAAGAGAGACTA 5 +GGCATGCTTTCCGCTC 6 +GGGTTGGTGTTCAACT 4 +GTCTCTCTAACTTAGC GTCTCTCTAACGTAGC 7 1 +GTCTCTCTATATGCAT 3 +GTCTCTCTATTTCCGG GTCTCTCTATTTCGGG 11 1 +GTCTCTCTCGAATAAT 8 +GTCTCTCTGGTCTGAC 5 +GTCTCTCTTGCGACAG 7 +GTCTCTCTTTGTGACT 8 +GTCTCTCTTTTATCAC 10 +GTGTAACCTGCGACAG 5 +GTTACGATATGGAAAT 4 +GTTCAACTCCGTGTTT GTTCAACTCCGTTTTT 6 1 +GTTCAACTTACAGCCG 5 +GTTCAACTTGACCAGT 7 +GTTCAACTTGATGCCC 15 +GTTGCACGCGATTGAT 5 +GTTGCACGGTCCGTAC 3 +GTTGCACGTGTAGTTT GTTGCACGTGTAGTTG 7 1 +GTTTGTTTCTCCGCAT 9 +GTTTGTTTGGAAACAG 5 +TAACTACTAGGCAACG 12 +TAACTACTTTAAGCGT 5 +TACAGCCGACCTTGCC 10 +TACAGCCGCCCATCTG TACAGCCGCCCCTCTG 6 1 +TACAGCCGGTTGCACG TACAGCCGGTTGCACA 11 1 +TACAGCCGTCGGTACG 7 +TACTTGTGTACAGCCG 8 +TACTTGTGTGACGGAC 4 +TACTTGTGTGTTCCAG TACTTGTGTATTCCAG 10 1 +TAGCCTCGCCGCTGTT TAGCCTCGCCGCTTTT 3 1 +TAGCCTCGGCAGTCGA 3 +TAGCCTCGTCGACGGT 4 +TAGCTTATCGGCACAT 7 +TAGCTTATTGTAAAGG 3 +TAGTCTAGCGACGTCA TAGTCTAGCGACGTCC 6 1 +TAGTCTAGTTATAGCC 5 +TAGTCTAGTTGATCTA 7 +TCATATGGTCTGCATC TCATATGGACTGCATC 5 2 +TCATCCTTCGTATTTC 10 +TCATCCTTTCCAGTCC TCATCCTTTCTAGTCC 6 1 +TCATCCTTTTCCGCTC TTATCCTTTTCCGCTC 8 1 +TCCTTATTCAGTCCCT 10 +TCCTTATTTAGTCTAG 6 +TCGAAGCTGTTACGAT 4 +TCGAAGCTTAACCATC TCGAAGCTAAACCATC 7 1 +TCGAAGCTTGCCTCAC 8 +TCGAAGCTTTCCGAGT 6 +TCGAAGCTTTCGGCCT TCGAATCTTTCGGCCT 3 1 +TGAAAACTGGTAACTTAGC 8 +TGAAAACTGGTACTTCAAT 3 +TGAAAACTGGTCCCATCTG TGAAAACTGGTCCCATCTA 8 1 +TGAAAACTGGTCTTTAATC 6 +TGAAAATGTCGGATTGCGA TGAAAATGTCGAATTGCGA 4 2 +TGAAACAATCCATATGCAT 12 +TGAAACACACGCTTTCTTT TGAAACACACGCTTTCTTA 16 1 +TGAAACACACGTACCGGCA 3 +TGAAACCCTTGAATGTTTG 6 +TGAAACCCTTGCAATTAGT 9 +TGAAACCCTTGGGAGAAGC TGAAACCCTTGAGAGAAGC 4 1 +TGAACATCTATGAGCCCAT TGAACATCTATAAGCCCAT,TGAACATCTATGAGCACAT 8 1,1 +TGAACGCTCAGTGACCCTC TGAACGCTCAGTGACTCTC 12 1 +TGAAGAAGTTAACTTCAAT 5 +TGAAGAAGTTACACAAGGC TGAAGAAGTTATACAAGGC 5 1 +TGAAGCGTAGGCCTACTAG 9 +TGAAGGAGCTTTCGAAGCT 7 +TGAAGGTTGTGAGGCAGTT 6 +TGAAGGTTGTGGCAGTCGA 8 +TGAAGGTTGTGGCTTTGGC TGAAGGTTGTGGCTTTTGC 5 1 +TGAAGGTTGTGGGAAACAG TGAAGGTTGTGAGAAACAG 7 1 +TGAATACCCAGGGAATAGA 3 +TGAATACCCAGTCGGTACG TGAATACCCAGTCGGTATG 11 1 +TGAATGAAGCACATTTGTT 6 +TGAATGAAGCAGAAAGACC GGAATGAAGCAGAAAGACC 3 1 +TGAATGACTTTAACTACTC TGAATGACTTTAACTACTT 3 1 +TGAATGACTTTACCTTCTT 3 +TGAATGACTTTGAATACGC 9 +TGAATGACTTTGTAACGTT 4 +TGAATGACTTTTAACTACT 9 +TGAATGCATGGCATCGCAG 5 +TGAATGCATGGCATCTCCC 10 +TGAATGCATGGCGAATAAT 3 +TGAATTCGACGGCGTTGCT 10 +TGACAACAAATTAGTGTTT TGAAAACAAATTAGTGTTT 7 1 +TGACAAGTCATATTCCTTG TGACAAGTCATTTTCCTTG 6 1 +TGACAAGTCATCTTCTTCG TGGCAAGTCATCTTCTTCG 4 1 +TGACAAGTCATTTAAGCGT 7 +TGACAATACTTATTCCTTG 16 +TGACACAACAGGAGCGGTA 7 +TGACACAACAGGCCTCTTT 4 +TGACACAACAGTGATGCCC 6 +TGACACAGTTTATATCTTC 9 +TGACACAGTTTTGGTATGA TGACACAGTTTGGGTATGA 3 1 +TGACACAGTTTTGTACACG 9 +TGACACTTTCTAGCACCAC 5 +TGACACTTTCTGACGATGG 9 +TGACACTTTCTTAACTACT 3 +TGACAGACCATCCTATTCA 4 +TGACAGACCATGGTCCCTT TGACAGACCATAGTCCCTT 10 1 +TGACATACATTGAGTTTCG TGACATACATTAAGTTTCG 3 1 +TGACATACATTGGACTTCC TGACATACATTAGACTTCC 4 1 +TGACATACATTTACAAACT 10 +TGACCAGCAGTCCACATTA 6 +TGACCAGTGTCAAAGA 7 +TGACCAGTTACTTGTG TGAGCAGTTACTTGTG 3 1 +TGACCATTACTCTTACTCC 11 +TGACCATTACTGTCCAAAG TGACCATTACTTTCCAAAG 11 1 +TGACCATTACTTGTAAAGG 3 +TGACCTCGTCTAGGAAGAC 7 +TGACCTCGTCTCGAGATGT 9 +TGACCTCGTCTTGCTATTT TGACCTCATCTTGCTATTT,TGACCTCGTCTTCCTATTT,TGACCTCGTCTTGCTATTC 6 1,1,1 +TGACCTCGTCTTTCACATA 11 +TGACGTAGAAGCAACAAAT 9 +TGACGTAGAAGTTATAGCC 9 +TGACGTATCGGGAAAGACC 3 +TGACTACCGTTTTTGGGAG 6 +TGACTACGAGCCCTACTAG 7 +TGACTCAGAATCATCTCCC 10 +TGACTCAGAATTTATAGCC 6 +TGACTGTTCTTACCTTGCC 4 +TGACTGTTCTTACTTCAAT TGACTGTCCTTACTTCAAT 8 1 +TGACTTGGTGTAGAAGTCC 3 +TGACTTGGTGTTTAAGCGT 4 +TGAGAAGCACTCAGTCCCT TGAGAAGCATTCAGTCCCT 3 1 +TGAGAAGCACTCCCATCTG TGAGAAGCACTCCCATGTG 11 1 +TGAGAAGCACTGTTTGTTT 6 +TGAGACAAAGGATTTCCGG 8 +TGAGACAACTTCAGTTTGC 8 +TGAGAGTTCACTCGACACC 13 +TGAGATTTACCCTAGTAAC TGAGATTTACCGTAGTAAC 3 1 +TGAGCAGGGTATTTGTGTC TGAGTAGGGTATTTGTGTC 13 1 +TGAGCCGGATTATCCCACG 8 +TGAGCGCATTCCCTATTCA TGAGCGCATTCGCTATTCA 6 1 +TGAGCGCATTCCGTCAGCA 8 +TGAGCGCATTCTCGGTACG 11 +TGAGGAATAGAGGCATGCT 6 +TGAGGAATAGATAGCTTAT 7 +TGAGGGAAATCGAGCCCAT 4 +TGAGGTTTCTCAGAGACTA 4 +TGAGGTTTCTCAGCACCAC TGAGGTTTCTCAGCACCAT 10 1 +TGAGGTTTCTCGATTGCGA TGAGGTTTCTCAATTGCGA 5 2 +TGAGTAATCCCTAGCCTCG 4 +TGAGTACACTCGAATACGC 6 +TGAGTCCATGTAAACGCCT 3 +TGAGTCCATGTCAAGAGGG 11 +TGAGTCCATGTCGGCTTAC 4 +TGAGTCCATGTGGCGGTTT TGAGTCCATGTGGTGGTTT 5 1 +TGAGTCCATGTTCTCAACC TGAGTCCATGTTCTCAAAC 11 1 +TGAGTCCGTACATTGTCAT 3 +TGAGTCCGTACGTTGTCAT 5 +TGAGTCGTCGTGAGCCCAT 5 +TGAGTGGAGCTACCACGCT 4 +TGAGTGGAGCTATGACCGA 9 +TGAGTGGAGCTCCCAAGCA 7 +TGAGTGGAGCTCTTCTTCG 7 +TGAGTGGAGCTGATTAGAC 3 +TGAGTGGAGCTGCATGGGT TGAGTGGAGCTACATGGGT,TGAGTGGCGCTGCATGGGT 6 2,1 +TGAGTGGAGCTGTTTGTTT 4 +TGAGTGGAGCTTCCCAATC 12 +TGAGTGGAGCTTGCAAGGG 11 +TGAGTGGAGCTTTTAACAG 5 +TGAGTTCTGTTTCGACACC TGAGTTCTGTTTCGACACT 5 1 +TGATAACCCGTATTCAGTA 11 +TGATAACCCGTCCTATTCA 8 +TGATAACCCGTGTTAACCA 4 +TGATAACCCGTTTCGGCCT 14 +TGATACGTGCTAATTCCCA 3 +TGATACGTGCTAGTGAAAG 7 +TGATACGTGCTCGAGATGT 5 +TGATACGTGCTGTTCAACT 10 +TGATACGTGCTTTTACCCT 3 +TGATAGAAATGCCCGAATG 4 +TGATAGAAATGGGTGGGAT TGATAGAAATGGGTGGAAT 6 1 +TGATAGAAATGTGACCCTC TGATAGAAATGAGACCCTC,TGATAGAAATGTGACCCTT 10 2,1 +TGATATTGCCTAAATCAGA 14 +TGATATTGCCTGGTGGGAT 3 +TGATCACCGAGCGCTAATA 7 +TGATCACCGAGTAGTCTAG 5 +TGATCCCGTAGCCGCTGTT TGATCCCGTAGGCGCTGTT 10 1 +TGATCCCGTAGGCTCTAGT TGATCCCGTAGACTCTAGT 7 1 +TGATCCCGTAGTTGCATAT TGATCCCGTCGTTGCATAT 13 1 +TGATCGACGGTCCCAAGCA 3 +TGATCGTCATGTACGTGCT 3 +TGATCGTCATGTGCGACAG 6 +TGATGCCCATATGCAT 3 +TGATGTCTTTCGGTGGGAT 7 +TGATGTTTGAGACACTAAG TGATGTTTGAGGCACTAAG 9 1 +TGATTCGCTGGAAATCAGA 4 +TGATTCGCTGGCGAATAAT 7 +TGATTCGCTGGGCATGGGT TGATTCGCTGGACATGGGT 5 1 +TGATTCGCTGGTTAAACTG 11 +TGATTCGCTGGTTGTGACT 3 +TGATTGCACGCTAAGACGG 3 +TGATTGCACGCTGTGCTCA 8 +TGATTGTCGCCCGGCACAT 6 +TGATTTACCCTAGAGACTA TGATTTACCCTAGAAACTA 12 1 +TGATTTACCCTGAATACGC 7 +TGCAAGGGATGGATTA 4 +TGCAAGGGGAATACGC 4 +TGCAAGGGTAAATAGG 3 +TGCCTCACCTTCTGGA 5 +TGCGACAGCTTTCTTT 5 +TGCGACAGGGCGTTAG 6 +TGCGACAGGGCTACTA 4 +TGGCTACCGAGCCCAT 4 +TGGCTACCTAGCTTAT 9 +TGGCTACCTGCCATCG 7 +TGGTATGAAGAGACTA 8 +TTAAGCGTGACAAAGG 8 +TTAAGCGTGAGCCCAT 6 +TTATCTGTATGGGCAC 8 +TTATCTGTCCCGAATG TTATCTGTCCCGAATA 6 1 +TTATCTGTCGAATAAT 10 +TTATCTGTTGACGGAC 4 +TTATCTGTTTATCTGT 7 +TTATCTGTTTGAGGGT 4 +TTATCTGTTTGATCTA TTATCTGTATGATCTA 3 1 +TTGCATATAGGCAACG 5 +TTGCATATCGAATAAT 10 +TTGGACTTAATGTTTG 4 +TTGGACTTAGGCAACG 3 +TTGGACTTGAGTTTCG 15 +TTTGTGTCCAGATTAC 8 +TTTGTGTCTGCAAGGG TTTGTGCCTGCAAGGG 4 1 diff --git a/tests/scrb_whitelist.tsv b/tests/scrb_whitelist.tsv index 44dd3ad4..fca13fec 100644 --- a/tests/scrb_whitelist.tsv +++ b/tests/scrb_whitelist.tsv @@ -1,6 +1,280 @@ -AAAAAA AAAACA,AAACAA,AAATAA,AACAAA,AATAAA,ACAAAA,AGAAAA,ATAAAA,CAAAAA,TAAAAA 146 1,2,1,1,1,1,1,1,11,5 -AGCGGG AACGGG,ACCGGG,AGAGGG,AGCAGG,AGCGAG,AGCGCG,AGCGGA,AGCGGC,AGCGGN,AGCGGT,AGCGTG,AGCTGG,AGTGGG,ATCGGG,CGCGGG,GGCGGG,NGCGGG,TGCGGG 550 3,1,1,4,2,2,3,7,2,4,5,1,1,2,9,5,2,4 -AGGCGG ACGCGG,AGGAGG,AGGCAG,AGGCCG,AGGCGA,AGGCGC,AGGCGT,AGGCTG,CGGCGG,GGGCGG,TGGCGG 141 1,2,1,2,4,2,5,1,2,2,1 -CGCGCA AGCGCA,CACGCA,CCCGCA,CGCACA,CGCCCA,CGCGAA,CGCGCC,CGCGCG,CGCGCT,CGCGTA,CGCTCA,CGTGCA,CTCGCA,TGCGCA 433 3,6,7,13,1,1,5,9,3,8,1,1,9,3 -GCCGTG ACCGTG,CCCGTG,GCAGTG,GCCATG,GCCGAG,GCCGGG,GCCGTA,GCCGTC,GCGGTG,GCTGTG,GGCGTG,GTCGTG,TCCGTG 268 5,3,1,3,3,5,4,2,1,5,1,1,3 -TAATCT CAATCT,GAATCT,TAATAT,TAATCA,TCATCT,TGATCT 124 1,1,1,1,1,1 +AAAAAA AGAAAA 146 1 +AAAATC 22 +AAACAT 21 +AAACTA AAACTN,GAACTA 27 1,1 +AAATAC 72 +AAATCA AGATCA,GAATCA 37 1,3 +AAATGT AAAGGT,CAATGT 41 1,1 +AAATTG CAATTG 36 1 +AACAAT 18 +AACATA ACCATA 24 1 +AAGATT AAGACT 26 1 +AAGTAT 33 +AAGTTA 19 +AATAAC GATAAC 23 1 +AATACA 70 +AATAGT CATAGT 18 1 +AATATG 15 +AATCAA 11 +AATCTT 16 +AATGAT GATGAT 18 2 +AATGTA 36 +AATTAG NATTAG 30 1 +AATTCT AATTCC,GATTCT 26 2,2 +ACAAAT 17 +ACATAA ACCTAA 13 1 +ACCCCC ACCCCT,ACTCCC 42 1,2 +ACCCGG 17 +ACCGCG ACCGCT 25 1 +ACGCCG ACGCCA 14 1 +ACGCGC ACGCGT 14 1 +ACGGCC ATGGCC,TCGGCC 21 2,3 +ACGGGG 11 +ACTAAA 17 +ACTATT GCTATT 21 1 +ACTTAT NCTTAT 24 1 +ACTTTA GCTTTA 18 1 +AGAATT 20 +AGATAT 33 +AGCCGC 11 +AGCGGG AGAGGG,AGCAGG,AGCGGA,AGCGGN,AGCTGG,AGTGGG,NGCGGG 550 1,4,3,2,1,1,2 +AGGCGG AGGAGG,AGGCAG,AGGCGT 141 2,1,5 +AGGGAT AGGGAN 31 1 +AGGGCG AGGGTG,AGGTCG 17 2,1 +AGGGGC AGTGGC 33 1 +AGTAAT 14 +AGTATA 13 +AGTTAA AGCTAA 35 1 +ATAAAC 23 +ATAAGT ATGAGT 28 1 +ATACAA ATACGA 40 1 +ATAGAT 27 +ATAGTA ACAGTA,ATCGTA,CTAGTA 15 1,1,2 +ATATAG 14 +ATATTC ATATTN,CTATTC 17 1,2 +ATCAAA NTCAAA 25 1 +ATCATT 25 +ATCGAG TTCGAG 85 1 +ATGATA CTGATA,GTGATA 16 1,1 +ATGTAA 21 +ATTAAG ATGAAG 26 1 +ATTACT ATGACT,GTTACT 11 1,1 +ATTAGA CTTAGA 26 2 +ATTATC CTTATC,GTTATC 15 1,1 +ATTCAT 13 +ATTCCT 11 +ATTCTA 15 +ATTGTT CTTGTT 23 2 +ATTTAC 18 +ATTTGT ATCTGT,CTTTGT 30 1,6 +ATTTTG ATGTTG 40 1 +CAAAAA 11 +CAAAAT 25 +CAAATA CCAATA 14 1 +CAAGCT CCAGCT 19 3 +CAATAA 18 +CACCAA CTCCAA 23 3 +CACCCC CACACC,TACCCC 26 1,2 +CACCGG TACCGG 15 1 +CACGCG 13 +CACGGC CAAGGC,TACGGC 26 2,1 +CAGCGC CAGCAC,CAGCGT 12 1,1 +CAGGCC 14 +CAGGGG CAGGGA 45 1 +CAGTCA CTGTCA 63 2 +CATAAA CGTAAA 19 1 +CATCAG CATCAC,CGTCAG 28 1,2 +CATTAT CATTAN 16 1 +CATTTA CATTGA 21 1 +CCAACC CCAACA,CCAACT,CCGACC,TCAACC 18 3,1,1,3 +CCACAT CCACAA 11 1 +CCACCG CAACCG 15 1 +CCACGC 22 +CCAGGG 12 +CCCACG 26 +CCCAGC ACCAGC 22 2 +CCCCAC CCCTAC,CCTCAC 11 5,1 +CCCCCA 45 +CCCCGC 34 +CCCCGT NCCCGT 59 1 +CCCCTG 19 +CCCGAG CCCGAN,CCCGAT 43 1,1 +CCCGCG CCCGCC,CCCGCT 29 1,1 +CCCGGA 11 +CCCTGG 13 +CCGAGG 22 +CCGATT CCGACT 15 1 +CCGCAG 22 +CCGCGA 21 +CCGGAC CCAGAC,TCGGAC 20 1,1 +CCGGGT CCGGCT,TCGGGT 18 1,2 +CCGGTG TCGGTG 16 1 +CCGTCG CCGTCT 18 1 +CCGTGC TCGTGC 23 2 +CCTCGG 11 +CCTGCG CCTGCA,CTTGCG,TCTGCG 26 3,1,2 +CCTGGC ACTGGC 11 1 +CCTTCC ACTTCC 39 1 +CGACCC 11 +CGACGG CGACAG 15 1 +CGACTT 12 +CGCACA AGCACA,CGCACC 13 2,8 +CGCCCT NGCCCT 25 1 +CGCCGA TGCCGA 13 1 +CGCCTC 23 +CGCGAC AGCGAC,CGCTAC,TGCGAC 13 1,2,1 +CGCGCA AGCGCA,CTCGCA,TGCGCA 433 3,9,3 +CGCGGT CGCTGT,CGTGGT 15 1,3 +CGCGTG CGTGTG,TGCGTG 95 3,2 +CGCTCG 14 +CGGACG CTGACG 17 1 +CGGAGC CGAAGC,CGGAGA,NGGAGC 31 1,1,1 +CGGCCA AGGCCA,CTGCCA,TGGCCA 21 3,1,1 +CGGCTG CAGCTG,TGGCTG 29 1,1 +CGGGAG 14 +CGGGTC CGAGTC,TGGGTC 24 1,1 +CGGTCC AGGTCC 16 1 +CGGTGG 20 +CGTGCC 11 +CTAAAA CTAAAG 31 1 +CTAATT CTAATG 14 1 +CTATAT CTATCT,NTATAT 12 2,1 +CTCCCG CTCCAG 37 1 +CTCCCT CTACCT,CTCCAT 15 1,1 +CTCCGC 28 +CTCGGG 13 +CTCTCC CTCTCA 31 1 +CTGCGG 17 +CTGGGC 20 +CTTAAT CTGAAT 23 3 +CTTCTG CTTATG,CTTCTC 19 1,1 +CTTTAA CTTTCA 12 2 +GAAATT 14 +GAAGAC GAAGAN 20 1 +GAATAT 13 +GACCCG 30 +GACCGC 18 +GACGCC 15 +GACGGG GAAGGG,GACAGG 14 1,1 +GAGCCC AAGCCC,GAGCCA 20 1,2 +GAGCGG 26 +GAGGGC 14 +GATATA 16 +GATCGT GGTCGT 11 1 +GATTAA 13 +GCACCC TCACCC 24 1 +GCACGG TCACGG 16 4 +GCAGGC GTAGGC,TCAGGC 12 1,1 +GCCCAG 34 +GCCCGA 12 +GCCCTC GCCCTT 19 1 +GCCGAC 13 +GCCGCA GCAGCA,GCCACA,TCCGCA 13 1,2,3 +GCCGGT 17 +GCCGTG GCAGTG,GCCATG,GCTGTG 268 1,3,5 +GCCTCG 13 +GCCTGC ACCTGC 13 1 +GCGAGC GCGAGT 14 1 +GCGCAC GCGCAN 19 1 +GCGCTG GCGCTA 30 1 +GCGGAG GAGGAG,GTGGAG 30 1,1 +GCGGGA GCGGTA 12 1 +GCGTCC 11 +GCGTGG 12 +GCTAGA 13 +GCTCGC 14 +GCTTAC GGTTAC 23 1 +GGACAT GGGCAT 18 2 +GGACCA GGACCN 29 1 +GGACGC 15 +GGAGCC 20 +GGCAAT CGCAAT 15 1 +GGCACG AGCACG,NGCACG 17 1,1 +GGCAGC GGCGGC 16 1 +GGCCAC 14 +GGCGCT AGCGCT,GGCTCT 11 1,2 +GGGCGA GGGCAA 13 1 +GGGCTC AGGCTC 11 1 +GGGGAC GTGGAC 18 1 +GGGTGC 17 +GGTCCC 23 +GGTGCG GGTGAG,GGTGCA 13 1,1 +GTAAAT GTGAAT 23 1 +GTACAC CTACAC,NTACAC 40 1,1 +GTATAA 11 +GTCAAG GTCAGG 22 2 +GTCCCC 26 +GTCCCT GCCCCT,GTCCTT,GTCTCT,GTGCCT 11 9,1,1,1 +GTCGCG TTCGCG 16 1 +GTGCGC GTGCGT 60 5 +GTTAAA 12 +GTTCGA CTTCGA 15 1 +GTTTAT GTCTAT 14 2 +TAAAAC 30 +TAAACA TAAGCA 47 1 +TAAATG 12 +TAACAA 20 +TAACTT 38 +TAAGAT 26 +TAATAG 12 +TAATCT 124 +TAATGA TAAGGA,TCATGA 23 1,1 +TAATTC CAATTC 23 1 +TACATT CACATT 26 1 +TACTAT 13 +TACTTA GACTTA,TCCTTA 16 1,2 +TAGAAT 25 +TAGATA TAGGTA 31 1 +TAGTAA 13 +TAGTGG 14 +TATAAG TACAAG 26 1 +TATACT CATACT,TATGCT,TGTACT 34 1,1,2 +TATAGA TACAGA 83 1 +TATATC CATATC 18 1 +TATCTA TCTCTA 13 1 +TATGAA 36 +TATTAC 20 +TATTGT TATGGT 31 1 +TATTTG 13 +TCAAAA 30 +TCAATT TCAGTT 16 1 +TCATAT 20 +TCCAAC ACCAAC,TCCACC,TCCATC,TCCTAC 18 2,1,1,1 +TCCCCG TGCCCG 43 3 +TCCCGC 102 +TCCGGG TCCAGG 30 2 +TCGAAG 19 +TCTAAT 13 +TCTTAA TCTCAA,TCTTGA 17 1,1 +TCTTTC TCCTTC,TCGTTC 17 6,1 +TGATTT TGCTTT 15 1 +TGGCGC TGGCCC,TGTCGC 14 1,1 +TGGGGG GGGGGG,TGGGGT,TGGGTG 37 1,1,1 +TGTTAT TGTCAT 11 1 +TGTTTA 15 +TTAACT TTAGCT 38 1 +TTAAGA 13 +TTAATC GTAATC 17 1 +TTACAT 12 +TTAGAA TTAGAN 18 1 +TTATAC 11 +TTATCA 13 +TTATGT TTCTGT 11 1 +TTATTG CTATTG,TCATTG,TTCTTG 16 3,2,1 +TTCAAT 14 +TTCATA GTCATA 11 3 +TTCCCT TTCCCA 28 1 +TTCCTC TTTCTC 36 1 +TTCTAA 17 +TTGAAA NTGAAA 15 1 +TTGATT 19 +TTGGAT CTGGAT,TCGGAT 15 1,2 +TTGTCC TTGCCC,TTGTGC 27 2,1 +TTGTTA 20 +TTTAAC 12 +TTTACA CTTACA,TCTACA 19 1,1 +TTTAGT 20 +TTTGGC ATTGGC,TTTTGC 17 1,1 +TTTGTA CTTGTA,TCTGTA 28 3,1 +TTTTAG TTGTAG 20 1 +TTTTCT CTTTCT 11 1 diff --git a/tests/tests.yaml b/tests/tests.yaml index 3b37ea24..b165de09 100644 --- a/tests/tests.yaml +++ b/tests/tests.yaml @@ -66,7 +66,7 @@ whitelist_scrb_seq: stdin: scrb_seq_fastq.1.gz outputs: [stdout] references: [scrb_whitelist.tsv] - options: whitelist --bc-pattern=CCCCCCNNNNNNNNNN -L test.log --stdin=scrb_seq_fastq.1.gz + options: whitelist --bc-pattern=CCCCCCNNNNNNNNNN -L test.log --plot-prefix=knee_plots whitelist_indrop: stdin: indrop.fastq.1.gz @@ -74,22 +74,23 @@ whitelist_indrop: references: [indrop_whitelist.tsv] options: whitelist --extract-method=regex --bc-pattern="(?P.{8,12})(?PGAGTGATTGCTTGTGACGCCTT)(?P.{8})(?P.{6})T{3}.*" -L test.log -whitelist_indrop_set_cell: +whitelist_indrop_density: stdin: indrop.fastq.1.gz outputs: [stdout] - references: [indrop_whitelist_user.tsv] - options: whitelist --extract-method=regex --bc-pattern="(?P.{8,12})(?PGAGTGATTGCTTGTGACGCCTT)(?P.{8})(?P.{6})T{3}.*" -L test.log --set-cell-number=607 + references: [indrop_whitelist_density.tsv] + options: whitelist --knee-method=density --extract-method=regex --bc-pattern="(?P.{8,12})(?PGAGTGATTGCTTGTGACGCCTT)(?P.{8})(?P.{6})T{3}.*" -L test.log --plot-prefix=knee_plots -whitelist_indrop_expect_cells: +whitelist_indrop_set_cell: stdin: indrop.fastq.1.gz outputs: [stdout] references: [indrop_whitelist_user.tsv] - options: whitelist --extract-method=regex --bc-pattern="(?P.{8,12})(?PGAGTGATTGCTTGTGACGCCTT)(?P.{8})(?P.{6})T{3}.*" -L test.log --expect-cells=6000 + options: whitelist --extract-method=regex --bc-pattern="(?P.{8,12})(?PGAGTGATTGCTTGTGACGCCTT)(?P.{8})(?P.{6})T{3}.*" -L test.log --set-cell-number=1000 -# TS: Note the 2 whitelist tests above all generate the same -# output. Ideally, would have input which generates different output -# for these tests. Also the input is so small that the plot output for -# this data is not very convincing. Consider expanding input? +whitelist_indrop_expect_cells_density: + stdin: indrop.fastq.1.gz + outputs: [stdout] + references: [indrop_whitelist_user_density.tsv] + options: whitelist --extract-method=regex --bc-pattern="(?P.{8,12})(?PGAGTGATTGCTTGTGACGCCTT)(?P.{8})(?P.{6})T{3}.*" -L test.log --knee-method=density --expect-cells=6000 whitelist_indrop_3_errors: stdin: indrop.fastq.1.gz diff --git a/umi_tools/whitelist.py b/umi_tools/whitelist.py index 60e03a05..5371bc81 100644 --- a/umi_tools/whitelist.py +++ b/umi_tools/whitelist.py @@ -50,35 +50,45 @@ Identifying the true cell barcodes ---------------------------------- -In the absence of the ``--set-cell-number`` option, ``whitelist`` uses -the distribution of read counts per CB or unique UMIs per CB -(``--method=[reads|umis]``) to identify the cut-off for 'true' UMIs -(the 'knee'). See this blog post for a more detailed explanation: +In the absence of the ``--set-cell-number`` option, ``whitelist`` +finds the knee in the curve for the cumulative read counts per CB or +unique UMIs per CB (``--method=[reads|umis]``). This point is referred +to as the 'knee'. Previously this point was identified using the +distribution of read counts per CB or unique UMIs per CB. The old +behaviour can be activated using ``--knee-method=density`` + +See this blog post for a more detailed exploration of the previous method: https://cgatoxford.wordpress.com/2017/05/18/estimating-the-number-of-true-cell-barcodes-in-single-cell-rna-seq/ Counts per cell barcode can be performed using either read or unique UMI counts. Use ``--method=[read|umis]`` to set the counting method. -The process of selecting the "best" local minima is not completely -foolproof. We recommend users always run whitelist with the -``--plot-prefix`` option to visualise the set of thresholds considered -for defining cell barcodes. This option will also generate a table -containing the thresholds which were rejected if you want to manually -adjust the threshold. In addition, if you expect that a local minima -will not be found, you can use the ``-allow-threshold-error`` option -to allow ``whitelist`` to proceed proceed past this stage. - -In addition, if you have some prior expectation on the maximum number -of cells which may have been sequenced, you can provide this using the -option ``--expect-cells`` (see below). - -If you don't mind if whitelist cannot identify a suitable threshold as -you intend to inspect the plots and identify the threshold manually, -provide the following options: ``--allow-threshold-error``, -``--plot-prefix=[PLOT_PREFIX]`` - -Finally, in some dataset there may be a risk that CBs above the +The process of selecting the "best" local minima with +``--knee-method=density`` is not completely foolproof. We recommend +users always run whitelist with the ``--plot-prefix`` option to +visualise the set of thresholds considered for defining cell +barcodes. This option will also generate a table containing the +thresholds which were rejected if you want to manually adjust the +threshold. In addition, if you expect that a local minima will not be +found, you can use the ``--allow-threshold-error`` option to allow +``whitelist`` to proceed proceed past this stage. In addition, if you +have some prior expectation on the maximum number of cells which may +have been sequenced, you can provide this using the option +``--expect-cells`` (see below). + +If you don't mind if ``whitelist --knee-method=density`` cannot +identify a suitable threshold as you intend to inspect the plots and +identify the threshold manually, provide the following options: +``--allow-threshold-error``, ``--plot-prefix=[PLOT_PREFIX]`` + +We expect that the default distance-based knee method should be more +robust than the density-based method. However, we haven't extensively +tested this method. If you have a dataset where you believe the +density-based method is better, please share this information with us: +https://github.com/CGATOxford/UMI-tools/issues + +Finally, in some datasets there may be a risk that CBs above the selected threshold are actually errors from another CB. We can detect potential instances of this by looking for CBs within one error (substition, insertion or deletion) of another CB with higher @@ -101,11 +111,24 @@ whitelist-specific options -------------------------- -""""""""""""""""" -``--plot-prefix`` -""""""""""""""""" - Use this option to indicate the prefix for the plots and table - describing the set of thresholds considered for defining cell barcodes +"""""""""""" +``--method`` +"""""""""""" + "reads" or "umis". Use either reads or unique UMI counts per cell + +"""""""""""" +``--knee-method`` +"""""""""""" + "distance" or "density". Two methods are available to detect + the 'knee' in the cell barcode count distributions. "distance" + identifies the maximum distance between the cumulative + distribution curve and a straight line between the first and + last points on the cumulative distribution curve. "density" + transforms the counts per UMI into a gaussian density and then + finds the local minima which separates "real" from "error" cell + barcodes. The gaussian method was the only method available + prior to UMI-tools v1.0.0. "distance" is now the default + method. """"""""""""""""""""" ``--set-cell-number`` @@ -120,10 +143,13 @@ """""""""""""""""" ``--expect-cells`` """""""""""""""""" - An upper limit estimate for the number of inputted cells. The knee - method will now select the first threshold (order ascendingly) - which results in the number of cell barcodes accepted being <= - EXPECTED_CELLS and > EXPECTED_CELLS * 0.1. + An upper limit estimate for the number of inputted cells. The + knee method will now select the first threshold (order + ascendingly) which results in the number of cell barcodes + accepted being <= EXPECTED_CELLS and > EXPECTED_CELLS * + 0.1. Note: This is not compatible with the default + ``--knee-method=distance`` since there is always as single + solution using this method. """"""""""""""""""""""""""" ``--allow-threshold-error`` @@ -138,10 +164,11 @@ barcodes. This value will also be used for error detection above the knee if required (``--ed-above-threshold``) -"""""""""""" -``--method`` -"""""""""""" - "reads" or "umis". Use either reads or unique UMI counts per cell +""""""""""""""""" +``--plot-prefix`` +""""""""""""""""" + Use this option to indicate the prefix for the plots and table + describing the set of thresholds considered for defining cell barcodes """""""""""""""""""""""""""""""""""""""""" ``--ed-above-threshold=[discard|correct]`` @@ -246,6 +273,10 @@ def main(argv=None): dest="method", choices=["reads", "umis"], help=("Use reads or unique umi counts per cell")) + group.add_option("--knee-method", + dest="knee_method", + choices=["distance", "density"], + help=("Use distance or density methods for detection of knee")) group.add_option("--expect-cells", dest="expect_cells", type="int", @@ -269,6 +300,7 @@ def main(argv=None): parser.add_option_group(group) parser.set_defaults(method="reads", + knee_method="distance", extract_method="string", filter_cell_barcodes=False, whitelist_tsv=None, @@ -292,9 +324,16 @@ def main(argv=None): add_umi_grouping_options=False, add_sam_options=False) - if options.expect_cells and options.cell_number: - U.error("Cannot supply both --expect-cells and " - "--cell-number options") + if options.expect_cells: + if options.knee_method == "distance": + U.error("Cannot use --expect-cells with 'distance' knee " + "method. Switch to --knee-method=density if you want to " + "provide an expectation for the number of " + "cells. Alternatively, if you know the number of cell " + "barcodes, use --cell-number") + if options.cell_number: + U.error("Cannot supply both --expect-cells and " + "--cell-number options") extract_cell, extract_umi = U.validateExtractOptions(options) @@ -397,6 +436,7 @@ def main(argv=None): cell_whitelist, true_to_false_map = whitelist_methods.getCellWhitelist( cell_barcode_counts, + options.knee_method, options.expect_cells, options.cell_number, options.error_correct_threshold, diff --git a/umi_tools/whitelist_methods.py b/umi_tools/whitelist_methods.py index c9292343..1eea8ce6 100644 --- a/umi_tools/whitelist_methods.py +++ b/umi_tools/whitelist_methods.py @@ -23,11 +23,12 @@ from umi_tools._dedup_umi import edit_distance -def getKneeEstimate(cell_barcode_counts, - expect_cells=False, - cell_number=False, - plotfile_prefix=None): - ''' estimate the number of "true" cell barcodes +def getKneeEstimateDensity(cell_barcode_counts, + expect_cells=False, + cell_number=False, + plotfile_prefix=None): + ''' estimate the number of "true" cell barcodes using a gaussian + density-based method input: cell_barcode_counts = dict(key = barcode, value = count) @@ -242,6 +243,163 @@ def getKneeEstimate(cell_barcode_counts, return final_barcodes +def getKneeEstimateDistance(cell_barcode_counts, + cell_number=False, + plotfile_prefix=None): + ''' estimate the number of "true" cell barcodes via a knee method + which finds the point with maximum distance + + input: + cell_barcode_counts = dict(key = barcode, value = count) + cell_number (optional) = define number of cell barcodes to accept + plotfile_prefix = (optional) prefix for plots + + returns: + List of true barcodes + ''' + + def getKneeDistance(values): + ''' + This function is based on + https://stackoverflow.com/questions/2018178/finding-the-best-trade-off-point-on-a-curve + + and https://dataplatform.cloud.ibm.com/analytics/notebooks/54d79c2a-f155-40ec-93ec-ed05b58afa39/view?access_token=6d8ec910cf2a1b3901c721fcb94638563cd646fe14400fecbb76cea6aaae2fb1 + + The idea is to draw a line from the first to last point on the + cumulative counts curve and then find the point on the curve + which is the maximum distance away from this line + ''' + + # get coordinates of all the points + nPoints = len(values) + allCoord = np.vstack((range(nPoints), values)).T + + # get the first point + firstPoint = allCoord[0] + # get vector between first and last point - this is the line + lineVec = allCoord[-1] - allCoord[0] + lineVecNorm = lineVec / np.sqrt(np.sum(lineVec**2)) + + # find the distance from each point to the line: + # vector between all points and first point + vecFromFirst = allCoord - firstPoint + + # To calculate the distance to the line, we split vecFromFirst into two + # components, one that is parallel to the line and one that is perpendicular + # Then, we take the norm of the part that is perpendicular to the line and + # get the distance. + # We find the vector parallel to the line by projecting vecFromFirst onto + # the line. The perpendicular vector is vecFromFirst - vecFromFirstParallel + # We project vecFromFirst by taking the scalar product of the vector with + # the unit vector that points in the direction of the line (this gives us + # the length of the projection of vecFromFirst onto the line). If we + # multiply the scalar product by the unit vector, we have vecFromFirstParallel + scalarProduct = np.sum( + vecFromFirst * np.matlib.repmat(lineVecNorm, nPoints, 1), axis=1) + vecFromFirstParallel = np.outer(scalarProduct, lineVecNorm) + vecToLine = vecFromFirst - vecFromFirstParallel + + # distance to line is the norm of vecToLine + distToLine = np.sqrt(np.sum(vecToLine ** 2, axis=1)) + + # knee/elbow is the point with max distance value + idxOfBestPoint = np.argmax(distToLine) + + return(distToLine, idxOfBestPoint) + + counts = [x[1] for x in cell_barcode_counts.most_common()] + values = list(np.cumsum(counts)) + + # We need to perform the distance knee iteratively with reduced + # number of CBs since it's sensitive to the number of CBs input + # and overestimates if too many CBs are used + previous_idxOfBestPoint = 0 + distToLine, idxOfBestPoint = getKneeDistance(values) + if idxOfBestPoint == 0: + raise ValueError("Something's gone wrong here!!") + + max_iterations = 100 + iterations = 0 + while idxOfBestPoint - previous_idxOfBestPoint != 0: + previous_idxOfBestPoint = idxOfBestPoint + iterations += 1 + if iterations > max_iterations: + break + distToLine, idxOfBestPoint = getKneeDistance(values[:idxOfBestPoint*3]) + + knee_final_barcodes = [x[0] for x in cell_barcode_counts.most_common()[ + :idxOfBestPoint+1]] + + if cell_number: + threshold = counts[cell_number] + final_barcodes = set([ + x for x, y in cell_barcode_counts.items() if y > threshold]) + else: + final_barcodes = knee_final_barcodes + + if plotfile_prefix: + + # colour-blind friendly colours - https://gist.github.com/thriveth/8560036 + CB_color_cycle = ['#377eb8', '#ff7f00', '#4daf4a', + '#f781bf', '#a65628', '#984ea3', + '#999999', '#e41a1c', '#dede00'] + + user_line = mlines.Line2D( + [], [], color=CB_color_cycle[2], ls="dashed", + markersize=15, label='User-defined') + selected_line = mlines.Line2D( + [], [], color=CB_color_cycle[0], ls="dashed", markersize=15, label='Knee') + + # plot of the original curve and its corresponding distances + plt.figure(figsize=(12, 6)) + plt.plot(distToLine, label='Distance', color='r') + plt.plot(values, label='Cumulative', color='b') + plt.plot([idxOfBestPoint], values[idxOfBestPoint], marker='o', + markersize=8, color="red", label='Knee') + + if cell_number: + plt.axvline(x=cell_number, ls="dashed", + color=CB_color_cycle[2], label="User-defined") + + plt.legend() + plt.savefig("%s_cell_barcode_knee.png" % plotfile_prefix) + + colours_selected = [CB_color_cycle[0] for x in range(0, len(final_barcodes))] + colours_rejected = ["black" for x in range(0, len(counts)-len(final_barcodes))] + colours = colours_selected + colours_rejected + + fig = plt.figure() + fig3 = fig.add_subplot(111) + fig3.scatter(x=range(1, len(counts)+1), y=counts, + c=colours, s=10, linewidths=0) + fig3.loglog() + fig3.set_xlim(0, len(counts)*1.25) + fig3.set_xlabel('Barcode index') + fig3.set_ylabel('Count') + fig3.axvline(x=len(knee_final_barcodes), ls="dashed", color=CB_color_cycle[0]) + + if cell_number: + fig3.axvline(x=cell_number, ls="dashed", color=CB_color_cycle[2]) + + lgd = fig3.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., + handles=[selected_line, user_line], + title="User threshold") + else: + lgd = fig3.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., + handles=[selected_line], + title="Knee threshold") + + fig.savefig("%s_cell_barcode_counts.png" % plotfile_prefix, + bbox_extra_artists=(lgd,), bbox_inches='tight') + + if not cell_number: + with U.openFile("%s_cell_thresholds.tsv" % plotfile_prefix, "w") as outf: + outf.write("count\n") + outf.write("%s\n" % idxOfBestPoint) + + return(final_barcodes) + + def getErrorCorrectMapping(cell_barcodes, whitelist, threshold=1): ''' Find the mappings between true and false cell barcodes based on an edit distance threshold. @@ -275,13 +433,22 @@ def getErrorCorrectMapping(cell_barcodes, whitelist, threshold=1): def getCellWhitelist(cell_barcode_counts, + knee_method="distance", expect_cells=False, cell_number=False, error_correct_threshold=0, plotfile_prefix=None): - cell_whitelist = getKneeEstimate( - cell_barcode_counts, expect_cells, cell_number, plotfile_prefix) + if knee_method == "distance": + cell_whitelist = getKneeEstimateDistance( + cell_barcode_counts, cell_number, plotfile_prefix) + + elif knee_method == "density": + cell_whitelist = getKneeEstimateDensity( + cell_barcode_counts, expect_cells, cell_number, plotfile_prefix) + + else: + raise ValueError("knee_method must be 'distance' or 'density'") U.info("Finished - whitelist determination") From 246df6a0be2f95ed874b766d435abd73e2e798b3 Mon Sep 17 00:00:00 2001 From: TomSmithCGAT Date: Mon, 11 Feb 2019 14:06:04 +0000 Subject: [PATCH 2/5] debugging travis testing --- .travis.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 91df2321..6512f6cc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,9 +17,8 @@ before_install: - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew upgrade python; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install freetype; fi - pip install cython - - pip install pandas==0.19 + - pip install pandas - pip install scipy - - pip install matplotlib==2.2.3 # can remove once py2 support dropped install: - python setup.py install From c406eedfe85347149a4acb734d2cae65ebc86db7 Mon Sep 17 00:00:00 2001 From: TomSmithCGAT Date: Mon, 11 Feb 2019 14:58:18 +0000 Subject: [PATCH 3/5] debug numpy import --- umi_tools/whitelist_methods.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/umi_tools/whitelist_methods.py b/umi_tools/whitelist_methods.py index 1eea8ce6..c4ef2ae4 100644 --- a/umi_tools/whitelist_methods.py +++ b/umi_tools/whitelist_methods.py @@ -16,6 +16,8 @@ import matplotlib.lines as mlines import numpy as np +import numpy.matlib as npm + from scipy.stats import gaussian_kde from scipy.signal import argrelextrema @@ -294,8 +296,9 @@ def getKneeDistance(values): # the unit vector that points in the direction of the line (this gives us # the length of the projection of vecFromFirst onto the line). If we # multiply the scalar product by the unit vector, we have vecFromFirstParallel + scalarProduct = np.sum( - vecFromFirst * np.matlib.repmat(lineVecNorm, nPoints, 1), axis=1) + vecFromFirst * npm.repmat(lineVecNorm, nPoints, 1), axis=1) vecFromFirstParallel = np.outer(scalarProduct, lineVecNorm) vecToLine = vecFromFirst - vecFromFirstParallel From 27f2b289aa91fd224524209292269bc8446b5adf Mon Sep 17 00:00:00 2001 From: TomSmithCGAT Date: Tue, 12 Feb 2019 18:01:18 +0000 Subject: [PATCH 4/5] debug paired end dedup --- umi_tools/dedup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/umi_tools/dedup.py b/umi_tools/dedup.py index c4a6bed8..bb48fd1e 100644 --- a/umi_tools/dedup.py +++ b/umi_tools/dedup.py @@ -197,7 +197,7 @@ def main(argv=None): outfile = pysam.Samfile(out_name, out_mode, template=infile) if options.paired: - outfile = umi_methods.TwoPassPairWriter(infile, outfile) + outfile = sam_methods.TwoPassPairWriter(infile, outfile) nInput, nOutput, input_reads, output_reads = 0, 0, 0, 0 From 90956d5835baec2b8f8a06c08e6d757c6ec4f6f0 Mon Sep 17 00:00:00 2001 From: TomSmithCGAT Date: Thu, 14 Feb 2019 14:22:59 +0000 Subject: [PATCH 5/5] replaces deprecated imp import --- umi_tools/umi_tools.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/umi_tools/umi_tools.py b/umi_tools/umi_tools.py index 4a507e6f..471e707e 100644 --- a/umi_tools/umi_tools.py +++ b/umi_tools/umi_tools.py @@ -28,7 +28,7 @@ from __future__ import absolute_import import os import sys -import imp +import importlib from umi_tools import __version__ @@ -55,8 +55,7 @@ def main(argv=None): command = argv[1] - (file, pathname, description) = imp.find_module(command, [path, ]) - module = imp.load_module(command, file, pathname, description) + module = importlib.import_module("umi_tools." + command, "umi_tools") # remove 'umi-tools' from sys.argv del sys.argv[0] module.main(sys.argv)