Skip to content

Commit

Permalink
small bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
pchaumeil committed Apr 3, 2018
1 parent 670a045 commit a0549f5
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 14 deletions.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,13 @@ GTDB-Tk makes use of the following 3rd party dependencies and assumes these are
* [FastANI](https://github.com/ParBLiSS/FastANI) >= 1.0: Jain C, et al. 2017. High-throughput ANI Analysis of 90K Prokaryotic Genomes Reveals Clear Species Boundaries.<i>bioRxiv.</i> 256800.
* [FastTree](http://www.microbesonline.org/fasttree/) >= 2.1.9: Price MN, et al. 2010 FastTree 2 -- Approximately Maximum-Likelihood Trees for Large Alignments. <i>PLoS ONE</i>, 5, e9490.

GTDB-Tk also assumes the Python 2.7.x and Perl interpreters are on your system path.
GTDB-Tk also assumes the Python 2.7.x and Perl interpreters are on your system path.
_NOTE_ :Perl interpreter requires Moose and IPC::Run modules. You need to make sure that the folder where perl modules (*.pm) are located is part the @inc variable.
If it is not , you can set the PERL5LIB ( or PERLIB) environment variable the same way you set PATH environment variable. Every directory listed in this variable will be added to @inc.
i.e:
```
export PERL5LIB=/path/to/moose/directory
```

GTDB-Tk requires ~70G+ of external data that need to be downloaded and unarchived (preferably in the same directory):
```
Expand Down
6 changes: 6 additions & 0 deletions gtdbtk/VERSION
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
0.0.5
- stable version for pip
0.0.4b3
- fastANI dependency fix
0.0.4b2
- fastANI dependency fix
0.0.4b1
- FastAni comparison bug fixing
0.0.4-beta
Expand Down
8 changes: 4 additions & 4 deletions gtdbtk/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Classify():
def __init__(self, cpus=1):
"""Initialize."""

check_dependencies(['pplacer', 'guppy', 'fastani'])
check_dependencies(['pplacer', 'guppy', 'fastANI'])

self.taxonomy_file = Config.TAXONOMY_FILE

Expand Down Expand Up @@ -93,7 +93,7 @@ def place_genomes(self,
pplacer_json_out,
user_msa_file,
pplacer_out)
#os.system(cmd)
os.system(cmd)

# extract tree
tree_file = os.path.join(out_dir, prefix + ".%s.classify.tree" % marker_set_id)
Expand Down Expand Up @@ -204,6 +204,8 @@ def run(self,
if Config.FASTANI_SPECIES_THRESHOLD < v.get("ani"):
suffixed_name = add_ncbi_prefix(v.get("ref_genome"))
taxa_str = ";".join(gtdb_taxonomy.get(suffixed_name))
if taxa_str.endswith("s__"):
taxa_str = taxa_str+v.get("ref_genome")
fout.write('%s\t%s\n' % (k, taxa_str))
fastani_dict[k]=v
redfout.write("{0}\tani\tNone\n".format(k))
Expand Down Expand Up @@ -585,15 +587,13 @@ def _calculate_fastani_distance(self,list_leaf,genomes):
cmd = 'fastANI --ql {0} --rl {1} -o {2} > /dev/null 2>&1'.format(os.path.join(self.tmp_output_dir, 'query_list.txt'),
os.path.join(self.tmp_output_dir, 'ref_list.txt'),
os.path.join(self.tmp_output_dir, 'results.tab'))
#print cmd
os.system(cmd)

if not os.path.isfile(os.path.join(self.tmp_output_dir,'results.tab')):
raise

dict_parser_distance = self._parse_fastani_results(os.path.join(self.tmp_output_dir,'results.tab'),list_leaf)
shutil.rmtree(self.tmp_output_dir)
#print len(dict_parser_distance)
return dict_parser_distance

except:
Expand Down
2 changes: 1 addition & 1 deletion gtdbtk/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def parse_options(self, options):

self.decorate(options)
elif(options.subparser_name == 'classify_wf'):
check_dependencies(['prodigal', 'hmmalign', 'pplacer', 'guppy', 'fastani'])
check_dependencies(['prodigal', 'hmmalign', 'pplacer', 'guppy', 'fastANI'])
self.identify(options)

options.identify_dir = options.out_dir
Expand Down
18 changes: 10 additions & 8 deletions scripts/trim_msa.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def __init__(self):

def run(self, msa, mask, marker_list, taxonomy_file,metadata_file, output):
dict_marker ={}
print "readmsa"
dict_genomes = read_fasta(msa,False)

print len(dict_genomes)
Expand Down Expand Up @@ -99,18 +100,19 @@ def selectGenomes(self,list_genomes,taxonomy_file,metadata_file):
dict_metadata = {}
with open(metadata_file,'r') as mf:
headers= mf.readline().strip().split("\t")
ncbi_type_strain_index = headers.index('ncbi_type_strain')
ncbi_type_strain_index = headers.index('ncbi_type_material')
checkm_completeness_index = headers.index('checkm_completeness')
checkm_contamination_index = headers.index('checkm_contamination')

for line in mf:
info = line.split('\t')
quality = float(info[checkm_completeness_index]) - 5*float(info[checkm_contamination_index])
if info[ncbi_type_strain_index] != '' and info[ncbi_type_strain_index] != 'yes' and info[ncbi_type_strain_index] != 'none':
print info[0]
print info[ncbi_type_strain_index]
sys.exit()
dict_metadata[info[0]] = { "strain":info[ncbi_type_strain_index],
print info[ncbi_type_strain_index]
tm = False
if info[ncbi_type_strain_index] == 't':
tm = True

dict_metadata[info[0]] = { "strain":tm,
"quality":quality
}

Expand All @@ -126,9 +128,9 @@ def selectGenomes(self,list_genomes,taxonomy_file,metadata_file):
else:
oldgid = dictgenusspecies.get(genusspecies)

if dict_metadata[oldgid]['strain'] =='yes' and (dict_metadata[gid]['strain'] =='' or dict_metadata[gid]['strain'] == 'none'):
if dict_metadata[oldgid]['strain'] is True and dict_metadata[gid]['strain'] is False:
continue
if dict_metadata[gid]['strain'] =='yes' and (dict_metadata[oldgid]['strain'] =='' or dict_metadata[oldgid]['strain'] == 'none'):
if dict_metadata[gid]['strain'] is True and dict_metadata[oldgid]['strain'] is False:
dictgenusspecies[genusspecies] = gid
elif dict_metadata[gid]['quality'] >= dict_metadata[oldgid]['quality']:
dictgenusspecies[genusspecies] = gid
Expand Down

0 comments on commit a0549f5

Please sign in to comment.