Skip to content

Commit

Permalink
star: Add optional arguments to control memory usage, related to #134
Browse files Browse the repository at this point in the history
With arguments genomeSAsparseD and genomeSAindexNbases one can control STAR's memory requirements and usage.
  • Loading branch information
tomazc committed Sep 20, 2017
1 parent 915c976 commit ef20687
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 8 deletions.
4 changes: 2 additions & 2 deletions iCount/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def _extract_parameter_data(function):
Every parameter in returned object can have the following entries:
* name - the name of parameter, preceeded by '--' if it is optional
* name - the name of parameter, preceded by '--' if it is optional
* default - the default value (only for optional parameters). Extracted
from function signature.
* type - type of parameter, extracted from function docstring. If not
Expand Down Expand Up @@ -391,7 +391,7 @@ def verbose_help(mode):

# all_args command:
def all_args():
"""Print all posssible parameter names and CLI commands where they are used."""
"""Print all possible parameter names and CLI commands where they are used."""
for param_name, commands in sorted(PARAMETERS.items(), key=lambda x: x[0].lstrip('-')):
if param_name in SHORT_OPTARG_NAMES:
short_name = ' ({})'.format(SHORT_OPTARG_NAMES[param_name])
Expand Down
10 changes: 5 additions & 5 deletions iCount/examples/tutorial.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@ set -vx
mkdir tutorial_example
cd tutorial_example

iCount releases
iCount releases --source ensembl

iCount species -r 88
iCount species --source ensembl -r 88

iCount genome homo_sapiens -r 88 --chromosomes 21 MT
iCount genome --source ensembl homo_sapiens 88 --chromosomes 21 MT

iCount annotation homo_sapiens -r 88
iCount annotation --source ensembl homo_sapiens 88

mkdir hs88
iCount indexstar homo_sapiens.88.chr21_MT.fa.gz hs88 --annotation homo_sapiens.88.gtf.gz
iCount indexstar homo_sapiens.88.chr21_MT.fa.gz hs88 --annotation homo_sapiens.88.gtf.gz --genomeSAsparseD 2 --genomeSAindexNbases 15

# the whole data set [880 MB] is available here:
#wget http://icount.fri.uni-lj.si/data/20101116_LUjh03/\
Expand Down
12 changes: 11 additions & 1 deletion iCount/externals/star.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def get_version():
return None


def build_index(genome, genome_index, annotation='', overhang=100, overhang_min=8, threads=1):
def build_index(genome, genome_index, annotation='', overhang=100, overhang_min=8, threads=1,
genomeSAsparseD=1, genomeSAindexNbases=14):
"""
Call STAR to generate genome index, which is used for mapping.
Expand All @@ -77,6 +78,13 @@ def build_index(genome, genome_index, annotation='', overhang=100, overhang_min=
TODO
threads : int
Number of threads that STAR can use for generating index.
genomeSAsparseD : int
Suffix array sparsity. Bigger numbers decrease RAM requirements
at the cost of mapping speed reduction. Suggested values
are 1 (30 GB RAM) or 2 (16 GB RAM).
genomeSAindexNbases : int
SA pre-indexing string length, typically between 10 and 15.
Longer strings require more memory, but result in faster searches.
Returns
-------
Expand All @@ -95,6 +103,8 @@ def build_index(genome, genome_index, annotation='', overhang=100, overhang_min=
args = [
'STAR',
'--runThreadN', '{:d}'.format(threads),
'--genomeSAsparseD', '{:d}'.format(genomeSAsparseD),
'--genomeSAindexNbases', '{:d}'.format(genomeSAindexNbases),
'--runMode', 'genomeGenerate',
'--genomeDir', '{:s}'.format(genome_index),
'--genomeFastaFiles', '{:s}'.format(genome_fname2),
Expand Down

0 comments on commit ef20687

Please sign in to comment.