Skip to content

Commit

Permalink
Updates saving of output files for querying
Browse files Browse the repository at this point in the history
  • Loading branch information
samhorsfield96 committed Oct 17, 2024
1 parent 12983c9 commit b91fc7d
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 15 deletions.
22 changes: 14 additions & 8 deletions ggCaller/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ def get_options():
default=False,
help='Save graph objects for sequence querying. '
'[Default = False] ')
IO.add_argument('--data',
IO.add_argument('--prev-run',
default=None,
help='Directory containing data from previous ggCaller run generated via "--save" ')
help='Directory containing data from previous ggCaller run. Must have been run with "--save" ')
IO.add_argument(
"--all-seq-in-graph",
dest="all_seq_in_graph",
Expand Down Expand Up @@ -400,6 +400,9 @@ def main():
Path_dir = os.path.join(output_dir, "Path_dir")
if not os.path.exists(Path_dir):
os.mkdir(Path_dir)

# ensure trailing slash present
Path_dir = os.path.join(Path_dir, "")

# if build graph specified, build graph and then call ORFs
if (options.graph is not None) and (options.colours is not None) and (options.query is None):
Expand All @@ -408,10 +411,10 @@ def main():
# query unitigs in previous saved ggc graph
elif (options.graph is not None) and (options.colours is not None) and (options.refs is None) and \
(options.query is not None):
if options.data is None:
print("Please specify a ggc_data directory from a previous ggCaller run.")
if options.prev_run is None:
print("Please specify a directory from a previous ggCaller run.")
sys.exit(1)
search_graph(graph, options.graph, options.colours, options.query, options.data, output_dir, options.query_id,
search_graph(graph, options.graph, options.colours, options.query, options.prev_run, output_dir, options.query_id,
options.threads)
print("Finished.")
sys.exit(0)
Expand Down Expand Up @@ -502,6 +505,12 @@ def main():
ORFMap_dir = os.path.join(output_dir, "ORF_dir")
if not os.path.exists(ORFMap_dir):
os.mkdir(ORFMap_dir)

# ensure trailing slash present
ORFMap_dir = os.path.join(ORFMap_dir, "")

# save the kmer_array to ORFMap_dir
graph[0].data_out(ORFMap_dir + "kmer_array.dat")

# load models models if required
if not options.no_filter:
Expand Down Expand Up @@ -552,9 +561,6 @@ def main():
# make sure trailing forward slash is present
objects_dir = os.path.join(objects_dir, "")

# serialise graph object and high scoring ORFs to future reading
graph[0].data_out(objects_dir + "ggc_graph.dat")

# create index of all high_scoring_ORFs node_IDs
node_index = defaultdict(list)
for colour_ID, file_path in ORF_file_paths.items():
Expand Down
10 changes: 5 additions & 5 deletions src/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ std::pair<std::map<size_t, std::string>, std::map<size_t, std::string>> Graph::f
{
const std::string base_filename = FM_fasta_file.substr(FM_fasta_file.find_last_of("/\\") + 1);

const auto idx_file_name = path_dir + "/" + base_filename + ".fmp";
const auto idx_file_name = path_dir + base_filename + ".fmp";
if (!load_from_file(fm_idx, idx_file_name))
{
cout << "FM-Index not available for " << FM_fasta_file << endl;
Expand All @@ -338,7 +338,7 @@ std::pair<std::map<size_t, std::string>, std::map<size_t, std::string>> Graph::f

// write ORF_map file
{
std::string ORF_file_path = tmp_dir + "/colour_" + std::to_string(colour_ID) + "_ORFs.tmp";
std::string ORF_file_path = tmp_dir + "colour_" + std::to_string(colour_ID) + "_ORFs.tmp";
std::ofstream ofs(ORF_file_path);
boost::archive::text_oarchive oa(ofs);
// write class instance to archive
Expand Down Expand Up @@ -622,7 +622,7 @@ std::pair<std::map<size_t, std::string>, std::map<size_t, std::string>> Graph::f
{
const std::string base_filename = FM_fasta_file.substr(FM_fasta_file.find_last_of("/\\") + 1);

const auto idx_file_name = path_dir + "/" + base_filename + ".fmp";
const auto idx_file_name = path_dir + base_filename + ".fmp";
if (!load_from_file(fm_idx, idx_file_name))
{
cout << "FM-Index not available for " << FM_fasta_file << endl;
Expand Down Expand Up @@ -768,7 +768,7 @@ std::pair<std::map<size_t, std::string>, std::map<size_t, std::string>> Graph::f
}

{
std::string file_path = tmp_dir + "/colour_" + std::to_string(colour_ID) + "_edges.tmp";
std::string file_path = tmp_dir + "colour_" + std::to_string(colour_ID) + "_edges.tmp";
std::ofstream ofs(file_path);
boost::archive::text_oarchive oa(ofs);
// write class instance to archive
Expand Down Expand Up @@ -824,7 +824,7 @@ std::pair<RefindMap, bool> Graph::refind_gene(const size_t& colour_ID,
{
const std::string base_filename = FM_fasta_file.substr(FM_fasta_file.find_last_of("/\\") + 1);

const auto idx_file_name = path_dir + "/" + base_filename + ".fmp";
const auto idx_file_name = path_dir + base_filename + ".fmp";
if (!load_from_file(fm_idx, idx_file_name))
{
cout << "FM-Index not available for " << FM_fasta_file << endl;
Expand Down
2 changes: 1 addition & 1 deletion src/indexing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ void calculate_genome_paths(const std::vector<Kmer>& head_kmer_arr,
// create fm index file name
const std::string base_filename = fasta_file.substr(fasta_file.find_last_of("/\\") + 1);

const auto idx_file_name = path_dir + "/" + base_filename + ".fmp";
const auto idx_file_name = path_dir + base_filename + ".fmp";

// initialise string of nodes for FM-index generation
std::string genome_path;
Expand Down
2 changes: 1 addition & 1 deletion src/match_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ std::pair<fm_index_coll, std::vector<size_t>> index_fasta(const std::string& fas
// create fm index file name
const std::string base_filename = fasta_file.substr(fasta_file.find_last_of("/\\") + 1);

std::string idx_file_name = path_dir + "/" + base_filename + ".fms";
std::string idx_file_name = path_dir + base_filename + ".fms";

// create entry for start and end of contigs within fm_index
std::vector<size_t> contig_locs;
Expand Down

0 comments on commit b91fc7d

Please sign in to comment.