forked from chapmanb/cloudbiolinux
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_fabfile.py
93 lines (76 loc) · 3.1 KB
/
data_fabfile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
"""Fabric deployment file to install genomic data on remote instances.
Designed to automatically download and manage biologically associated
data on cloud instances like Amazon EC2.
Fabric (http://docs.fabfile.org) manages automation of remote servers.
Usage:
fab -i key_file -H servername -f data_fabfile.py install_data
"""
import os
import sys
from fabric.main import load_settings
from fabric.api import *
from fabric.contrib.files import *
from fabric.context_managers import path
try:
import boto
except ImportError:
boto = None
# preferentially use local cloudbio directory
for to_remove in [p for p in sys.path if p.find("cloudbiolinux-") > 0]:
sys.path.remove(to_remove)
sys.path.append(os.path.dirname(__file__))
from cloudbio.utils import _setup_logging, _configure_fabric_environment
from cloudbio.biodata import genomes
# -- Host specific setup
env.remove_old_genomes = False
def setup_environment():
"""Setup environment with required data file locations.
"""
_setup_logging(env)
_add_defaults()
_configure_fabric_environment(env, ignore_distcheck=True)
def _add_defaults():
"""Defaults from fabricrc.txt file; loaded if not specified at commandline.
"""
env.config_dir = os.path.join(os.path.dirname(__file__), "config")
conf_file = "tool_data_table_conf.xml"
env.tool_data_table_conf_file = os.path.join(os.path.dirname(__file__),
"installed_files", conf_file)
if not env.has_key("distribution"):
config_file = os.path.join(env.config_dir, "fabricrc.txt")
if os.path.exists(config_file):
env.update(load_settings(config_file))
CONFIG_FILE = os.path.join(os.path.dirname(__file__), "config", "biodata.yaml")
def install_data(config_source=CONFIG_FILE):
"""Main entry point for installing useful biological data.
"""
setup_environment()
genomes.install_data(config_source)
def install_data_raw(config_source=CONFIG_FILE):
"""Installing useful biological data building from scratch. Useful for debugging.
"""
setup_environment()
genomes.install_data(config_source, approaches=["raw"])
def install_data_s3(config_source=CONFIG_FILE, do_setup_environment=True):
"""Install data using pre-existing genomes present on Amazon s3.
"""
setup_environment()
genomes.install_data_s3(config_source)
def install_data_rsync(config_source=CONFIG_FILE):
"""Install data using Galaxy rsync data servers.
"""
setup_environment()
genomes.install_data_rsync(config_source)
def install_data_ggd(recipe, organism):
"""Install data using Get Genomics Data (GGD) recipes.
"""
setup_environment()
from cloudbio.biodata import ggd, genomes
genome_dir = os.path.join(genomes._make_genome_dir(), organism)
recipe_file = os.path.join(os.path.dirname(__file__), "ggd-recipes", organism, "%s.yaml" % recipe)
ggd.install_recipe(genome_dir, env, recipe_file, organism)
def upload_s3(config_source=CONFIG_FILE):
"""Upload prepared genome files by identifier to Amazon s3 buckets.
"""
setup_environment()
genomes.upload_s3(config_source)