Skip to content

Commit

Permalink
parameterize rgfa sample name
Browse files Browse the repository at this point in the history
  • Loading branch information
glennhickey committed May 10, 2023
1 parent 42234d6 commit 2b38d5a
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 10 deletions.
8 changes: 4 additions & 4 deletions src/gfa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ static bool should_write_as_w_line(const PathHandleGraph* graph, path_handle_t p
static void write_w_line(const PathHandleGraph* graph, ostream& out, path_handle_t path_handle, unordered_map<tuple<string, int64_t, string>, size_t>& last_phase_block_end);

void graph_to_gfa(const PathHandleGraph* graph, ostream& out, const set<string>& rgfa_paths,
bool rgfa_pline, bool use_w_lines) {
bool rgfa_pline, bool use_w_lines, const string& rgfa_sample_name) {

// TODO: Support sorting nodes, paths, and/or edges for canonical output
// TODO: Use a NamedNodeBackTranslation (or forward translation?) to properly round-trip GFA that has had to be chopped.
Expand Down Expand Up @@ -335,7 +335,7 @@ string create_rgfa_path_name(const string& path_name, int rgfa_rank, const subra
PathMetadata::NO_PHASE_BLOCK, rgfa_subrange);
}

string strip_rgfa_path_name(const string& path_name, const string& rgfa_sample) {
string strip_rgfa_path_name(const string& path_name) {

PathSense path_sense;
string path_sample;
Expand All @@ -352,7 +352,6 @@ string strip_rgfa_path_name(const string& path_name, const string& rgfa_sample)
if (sr_pos != string::npos && path_locus.length() - sr_pos >= 6) {
size_t sn_pos = path_locus.rfind("SN:Z:", sr_pos - 1);
assert(sn_pos != string::npos);
assert(path_sample == rgfa_sample);

string orig_sample;
if (sn_pos > 0) {
Expand All @@ -376,6 +375,7 @@ void rgfa_graph_cover(MutablePathMutableHandleGraph* graph,
SnarlManager* snarl_manager,
const unordered_set<path_handle_t>& reference_paths,
int64_t minimum_length,
const string& rgfa_sample_name,
const unordered_map<string, vector<pair<int64_t, int64_t>>>& preferred_intervals){

// for sanity's sake, we don't want to ever support multiple rgfa covers, so start by
Expand Down Expand Up @@ -499,7 +499,7 @@ void rgfa_graph_cover(MutablePathMutableHandleGraph* graph,
subrange_t rgfa_frag_subrange = graph->get_subrange(path_handle);
rgfa_frag_subrange.first = rgfa_frag_pos + (rgfa_frag_subrange != PathMetadata::NO_SUBRANGE ? rgfa_frag_subrange.first : 0);
rgfa_frag_subrange.second = rgfa_frag_subrange.first + rgfa_frag_length;
string rgfa_frag_name = create_rgfa_path_name(path_name, rgfa_rank, rgfa_frag_subrange);
string rgfa_frag_name = create_rgfa_path_name(path_name, rgfa_rank, rgfa_frag_subrange, rgfa_sample_name);

#ifdef debug
#pragma omp critical(cerr)
Expand Down
8 changes: 5 additions & 3 deletions src/gfa.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ using namespace std;
void graph_to_gfa(const PathHandleGraph* graph, ostream& out,
const set<string>& rgfa_paths = {},
bool rgfa_pline = false,
bool use_w_lines = true);
bool use_w_lines = true,
const string& rgfa_sample_name = "");


/// Prototype code to tag paths as rGFA paths. Either needs to be completely scrapped
Expand All @@ -52,10 +53,10 @@ int get_rgfa_rank(const string& path_name, const string& rgfa_sample="_rGFA_");
/// Add the rgfa rank to a pathname, also setting its sample to the special rgfa sample and
/// moving its old sample into the locus field
string create_rgfa_path_name(const string& path_name, int rgfa_rank, const subrange_t& subrange,
const string& rgfa_sample="_rGFA_");
const string& rgfa_sample);

/// Remove the rGFA information from a path name, effectively undoing set_rgfa_rank
string strip_rgfa_path_name(const string& path_name, const string& rgfa_sample="_rGFA_");
string strip_rgfa_path_name(const string& path_name);

/// Compute the rGFA path cover
/// graph: the graph
Expand All @@ -67,6 +68,7 @@ void rgfa_graph_cover(MutablePathMutableHandleGraph* graph,
SnarlManager* snarl_manager,
const unordered_set<path_handle_t>& reference_paths,
int64_t minimum_length,
const string& rgfa_sample_name,
const unordered_map<string, vector<pair<int64_t, int64_t>>>& preferred_intervals = {});

void rgfa_snarl_cover(const PathHandleGraph* graph,
Expand Down
13 changes: 10 additions & 3 deletions src/subcommand/paths_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@ void help_paths(char** argv) {
<< " -r, --retain-paths output a graph with only the selected paths retained" << endl
<< " rGFA cover" << endl
<< " -R, --rgfa-min-length N add rGFA cover to graph, using seleciton from -Q/-S as rank-0 backbone, only adding fragments >= Nbp (default:-1=disabled)" << endl
<< " -N, --rgfa-sample STR give all rGFA cover fragments sample name (path prefix) STR (default: _rGFA_)." << endl
<< " -s, --snarls FILE snarls (from vg snarls) to avoid recomputing. snarls only used for rgfa cover (-R)." << endl
<< " -t, --threads N use up to N threads when computing rGFA covoer (default: all available)" << endl
<< " -t, --threads N use up to N threads when computing rGFA cover (default: all available)" << endl
<< " output path data:" << endl
<< " -X, --extract-gam print (as GAM alignments) the stored paths in the graph" << endl
<< " -A, --extract-gaf print (as GAF alignments) the stored paths in the graph" << endl
Expand Down Expand Up @@ -107,6 +108,7 @@ int main_paths(int argc, char** argv) {
bool drop_paths = false;
bool retain_paths = false;
int64_t rgfa_min_len = -1;
string rgfa_sample_name = "_rGFA_";
string snarl_filename;
string graph_file;
string gbwt_file;
Expand Down Expand Up @@ -141,6 +143,7 @@ int main_paths(int argc, char** argv) {
{"drop-paths", no_argument, 0, 'd'},
{"retain-paths", no_argument, 0, 'r'},
{"rgfa-cover", required_argument, 0, 'R'},
{"rgfa-sample", required_argument, 0, 'N'},
{"snarls", required_argument, 0, 's'},
{"extract-gam", no_argument, 0, 'X'},
{"extract-gaf", no_argument, 0, 'A'},
Expand All @@ -163,7 +166,7 @@ int main_paths(int argc, char** argv) {
};

int option_index = 0;
c = getopt_long (argc, argv, "hLXv:x:g:Q:VEMCFAS:Tq:drR:s:aGp:ct:",
c = getopt_long (argc, argv, "hLXv:x:g:Q:VEMCFAS:Tq:drR:N:s:aGp:ct:",
long_options, &option_index);

// Detect the end of the options.
Expand Down Expand Up @@ -204,6 +207,10 @@ int main_paths(int argc, char** argv) {
output_formats++;
break;

case 'N':
rgfa_sample_name = optarg;
break;

case 's':
snarl_filename = optarg;
break;
Expand Down Expand Up @@ -622,7 +629,7 @@ int main_paths(int argc, char** argv) {
}
});

rgfa_graph_cover(mutable_graph, snarl_manager.get(), reference_paths, rgfa_min_len);
rgfa_graph_cover(mutable_graph, snarl_manager.get(), reference_paths, rgfa_min_len, rgfa_sample_name);
}

// output the graph
Expand Down

0 comments on commit 2b38d5a

Please sign in to comment.