diff --git a/ggCaller/__main__.py b/ggCaller/__main__.py index 3d65e4e..47275ee 100644 --- a/ggCaller/__main__.py +++ b/ggCaller/__main__.py @@ -59,9 +59,9 @@ def get_options(): default=False, help='Save graph objects for sequence querying. ' '[Default = False] ') - IO.add_argument('--data', + IO.add_argument('--prev-run', default=None, - help='Directory containing data from previous ggCaller run generated via "--save" ') + help='Directory containing data from previous ggCaller run. Must have been run with "--save" ') IO.add_argument( "--all-seq-in-graph", dest="all_seq_in_graph", @@ -400,6 +400,9 @@ def main(): Path_dir = os.path.join(output_dir, "Path_dir") if not os.path.exists(Path_dir): os.mkdir(Path_dir) + + # ensure trailing slash present + Path_dir = os.path.join(Path_dir, "") # if build graph specified, build graph and then call ORFs if (options.graph is not None) and (options.colours is not None) and (options.query is None): @@ -408,10 +411,10 @@ def main(): # query unitigs in previous saved ggc graph elif (options.graph is not None) and (options.colours is not None) and (options.refs is None) and \ (options.query is not None): - if options.data is None: - print("Please specify a ggc_data directory from a previous ggCaller run.") + if options.prev_run is None: + print("Please specify a directory from a previous ggCaller run.") sys.exit(1) - search_graph(graph, options.graph, options.colours, options.query, options.data, output_dir, options.query_id, + search_graph(graph, options.graph, options.colours, options.query, options.prev_run, output_dir, options.query_id, options.threads) print("Finished.") sys.exit(0) @@ -502,6 +505,12 @@ def main(): ORFMap_dir = os.path.join(output_dir, "ORF_dir") if not os.path.exists(ORFMap_dir): os.mkdir(ORFMap_dir) + + # ensure trailing slash present + ORFMap_dir = os.path.join(ORFMap_dir, "") + + # save the kmer_array to ORFMap_dir + graph[0].data_out(ORFMap_dir + "kmer_array.dat") # load models models if required if not options.no_filter: @@ -552,9 +561,6 @@ def main(): # make sure trailing forward slash is present objects_dir = os.path.join(objects_dir, "") - # serialise graph object and high scoring ORFs to future reading - graph[0].data_out(objects_dir + "ggc_graph.dat") - # create index of all high_scoring_ORFs node_IDs node_index = defaultdict(list) for colour_ID, file_path in ORF_file_paths.items(): diff --git a/src/graph.cpp b/src/graph.cpp index 64d69bb..bdec43c 100644 --- a/src/graph.cpp +++ b/src/graph.cpp @@ -314,7 +314,7 @@ std::pair, std::map> Graph::f { const std::string base_filename = FM_fasta_file.substr(FM_fasta_file.find_last_of("/\\") + 1); - const auto idx_file_name = path_dir + "/" + base_filename + ".fmp"; + const auto idx_file_name = path_dir + base_filename + ".fmp"; if (!load_from_file(fm_idx, idx_file_name)) { cout << "FM-Index not available for " << FM_fasta_file << endl; @@ -338,7 +338,7 @@ std::pair, std::map> Graph::f // write ORF_map file { - std::string ORF_file_path = tmp_dir + "/colour_" + std::to_string(colour_ID) + "_ORFs.tmp"; + std::string ORF_file_path = tmp_dir + "colour_" + std::to_string(colour_ID) + "_ORFs.tmp"; std::ofstream ofs(ORF_file_path); boost::archive::text_oarchive oa(ofs); // write class instance to archive @@ -622,7 +622,7 @@ std::pair, std::map> Graph::f { const std::string base_filename = FM_fasta_file.substr(FM_fasta_file.find_last_of("/\\") + 1); - const auto idx_file_name = path_dir + "/" + base_filename + ".fmp"; + const auto idx_file_name = path_dir + base_filename + ".fmp"; if (!load_from_file(fm_idx, idx_file_name)) { cout << "FM-Index not available for " << FM_fasta_file << endl; @@ -768,7 +768,7 @@ std::pair, std::map> Graph::f } { - std::string file_path = tmp_dir + "/colour_" + std::to_string(colour_ID) + "_edges.tmp"; + std::string file_path = tmp_dir + "colour_" + std::to_string(colour_ID) + "_edges.tmp"; std::ofstream ofs(file_path); boost::archive::text_oarchive oa(ofs); // write class instance to archive @@ -824,7 +824,7 @@ std::pair Graph::refind_gene(const size_t& colour_ID, { const std::string base_filename = FM_fasta_file.substr(FM_fasta_file.find_last_of("/\\") + 1); - const auto idx_file_name = path_dir + "/" + base_filename + ".fmp"; + const auto idx_file_name = path_dir + base_filename + ".fmp"; if (!load_from_file(fm_idx, idx_file_name)) { cout << "FM-Index not available for " << FM_fasta_file << endl; diff --git a/src/indexing.cpp b/src/indexing.cpp index caa0a51..c822a8a 100644 --- a/src/indexing.cpp +++ b/src/indexing.cpp @@ -402,7 +402,7 @@ void calculate_genome_paths(const std::vector& head_kmer_arr, // create fm index file name const std::string base_filename = fasta_file.substr(fasta_file.find_last_of("/\\") + 1); - const auto idx_file_name = path_dir + "/" + base_filename + ".fmp"; + const auto idx_file_name = path_dir + base_filename + ".fmp"; // initialise string of nodes for FM-index generation std::string genome_path; diff --git a/src/match_string.cpp b/src/match_string.cpp index e450855..82de1b1 100644 --- a/src/match_string.cpp +++ b/src/match_string.cpp @@ -19,7 +19,7 @@ std::pair> index_fasta(const std::string& fas // create fm index file name const std::string base_filename = fasta_file.substr(fasta_file.find_last_of("/\\") + 1); - std::string idx_file_name = path_dir + "/" + base_filename + ".fms"; + std::string idx_file_name = path_dir + base_filename + ".fms"; // create entry for start and end of contigs within fm_index std::vector contig_locs;