-
Notifications
You must be signed in to change notification settings - Fork 6
/
set_kemet_working-directory.py
executable file
·134 lines (110 loc) · 4.26 KB
/
set_kemet_working-directory.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/env python
# coding: utf-8
import os
from os import path
import argparse
#import shutil
#from kemet_data import project_dir
def set_directories(dir_base, gapfill_usage=False):
"""
Setup function to generate folders and instruction files for
KEMET script execution.
Args:
dir_base (str): folder path in which "kemet.py" is going to be executed
gapfill_usage (bool, optional): flag to indicate that GSMMs functionalities are wanted.
Defaults to False.
"""
directories_to_make = [
"KEGG_annotations",
"ktests",
"klists",
"reports_txt",
"reports_tsv",
"taxonomies",
"Knumber_ntsequences",
"multiple_fasta",
"HMM",
"HMM_HITS",
"genomes",
"oneBM_modules",
]
if gapfill_usage:
directories_to_make += [
"report_gapfill",
"biggapi_download",
"models",
"models_gapfilled",
"de_novo_models",
]
genome_instruction_file = path.join(dir_base, "genomes.instruction")
module_file = path.join(dir_base, "module_file.instruction")
ko_file = path.join(dir_base, "ko_file.instruction")
kegg_brite_organisms = path.join(dir_base, "br08601.keg")
os.system(f"curl --silent https://rest.kegg.jp/get/br:br08601 > {kegg_brite_organisms}")
print("KEGG Organisms hierarchy DOWNLOADED")
if path.isfile(genome_instruction_file):
print("Instruction file ALREADY EXISTS")
else:
with open(genome_instruction_file, "w") as f:
print("id", "taxonomy", "universe", sep="\t", file=f)
print("genome_instruction file GENERATED")
if path.isfile(module_file):
print("module_file ALREADY EXISTS")
else:
os.mknod(module_file)
print("module_file GENERATED")
if path.isfile(ko_file):
print("ko_file ALREADY EXISTS")
else:
os.mknod(ko_file)
print("ko_file GENERATED")
for el in directories_to_make:
dir = path.join(dir_base, el)
if path.isdir(dir):
print(f"{dir} folder ALREADY EXISTS")
else:
os.mkdir(dir)
print(f"{dir} folder CREATED")
#def copy_ref_kegg_modules_and_DB(project_dir, dir_base):
# if not path.isdir("kemet_data"):
# os.mkdir("kemet_data")
# os.chdir("kemet_data")
# shutil.copytree(project_dir, path.join(dir_base, "kemet_data"))
def set_kk_database():
pass
def update_kk_database():
pass
###############################################################################
def main():
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter,
description=
'''
Base command for setting KEMET package working directory.
Create folders and instruction files; helper function to manage KEGG MODULE .kk database.
''')
# Add command line option for setting the base directory?
parser.add_argument('-k','--set_kk_DB', action="store_true",
help='''
Choose this option to generate KEGG Module DB (.kk files),
in order to perform KEGG Modules Completeness evaluation.
Default: already generated''')
parser.add_argument('-u','--update_kk_DB', action="store_true",
help='''
Choose this option to update already existing KEGG Module DB (.kk files).''')
parser.add_argument('-G','--gapfill_usage', action="store_true",
help='''
Choose this option to create required folders for the GSMM Gapfilling,
follow-up of the HMM search procedures.''')
args = parser.parse_args()
###############################################################################
dir_base = os.getcwd()
set_directories(dir_base, args.gapfill_usage)
#POSSIBILITY: ALLOW DYNAMIC kk-files DATABASE UPDATES
if args.set_kk_DB:
set_kk_database()
elif args.update_kk_DB:
update_kk_database()
#if not "kemet_data" in os.listdir() and not "KEGG_MODULES" in os.listdir():
# copy_ref_kegg_modules_and_DB(project_dir, dir_base)
if __name__ == "__main__":
main()