emirge_amplicon.py use now controlled by -amplimit switch

HRGV · Jul 22, 2015 · 1f50709 · 1f50709
1 parent 83317a1
commit 1f50709
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -32,8 +32,8 @@ Python installed. (OSX is for the brave, we have not tested this!)
    here https://github.com/HRGV/phyloFlash/releases
 
    ```bash
-   wget https://github.com/HRGV/phyloFlash/archive/v2.0-beta3.tar.gz  
-   tar -xzf v2.0-beta3.tar.gz
+   wget https://github.com/HRGV/phyloFlash/archive/v2.0-beta4.tar.gz  
+   tar -xzf v2.0-beta4.tar.gz
    ```
 
 2. Install the tools phyloFlash uses:

diff --git a/phyloFlash.pl b/phyloFlash.pl
@@ -2,7 +2,7 @@
 =head1 NAME
 
 phyloFlash - A script to rapidly estimate the phylogenetic composition of
-             an illumina (meta)genomic dataset.
+             an illumina (meta)genomic dataset and reconstruct SSU rRNA genes.
 
 =head1 SYNOPSIS
 
@@ -65,6 +65,13 @@ =head1 OPTIONS
 for transcriptomes with a lot of rRNA reads (use values <1000000).
 Default: unlimited
 
+=item -amplimit I<N>
+
+Sets the limit of SSU read pairs to switch from emirge.py to
+emirge_amplicon.py. This feature is not reliable as emirge_amplicon.py
+has been problematic to run (use values >100000).
+Default: 500000
+
 =item -id I<N>
 
 Minimum allowed identity for read mapping process in %. Must be within
@@ -167,6 +174,7 @@ =head1 COPYRIGHT AND LICENSE
 my $id          = 70;           # minimum %id for mapping
 my $readlength  = 100;          # length of input reads
 my $readlimit   = -1;           # max # of reads to use
+my $amplimit   = 500000;        # number of SSU pairs at which to switch to emirge_amplicon
 my $maxinsert   = 1200;         # max insert size for paired end read mapping
 my $cpus        = get_cpus      # num cpus to use
 my $clusterid   = 97;           # threshold for vsearch clustering
@@ -241,6 +249,7 @@ sub parse_cmdline {
                'dbhome=s' => \$DBHOME,
                'readlength=i' => \$readlength,
                'readlimit=i' => \$readlimit,
+	       'amplimit=i' => \$amplimit,
                'maxinsert=i' => \$maxinsert,
                'id=i' => \$id,
                'clusterid=i' => \$clusterid,
@@ -824,11 +833,11 @@ sub emirge_run {
 
         msg("the insert size used is $ins_used +- $ins_std");
         # FIXME: EMIRGE dies with too many SSU reads, the cutoff needs to be adjusted...
-        if ($SSU_total_pairs < 150000) {
-            msg("Less than 300k SSU reads - using Emirge");
+        if ($SSU_total_pairs <= $amplimit) {
+            msg("Less than $amplimit SSU read pairs - using Emirge");
         } else {
             $cmd = "emirge_amp";
-            msg("Warning: More than 25k SSU reads - using Emirge Amplicon");
+            msg("Warning: More than $amplimit SSU reads - using Emirge Amplicon");
         }
 
         $args = "  -1 $libraryNAME.$readsf.SSU.1.fq "