Skip to content

Commit

Permalink
adapt rgfa tests to latest conventions
Browse files Browse the repository at this point in the history
  • Loading branch information
glennhickey committed May 11, 2023
1 parent 2b38d5a commit 5ee7304
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 30 deletions.
18 changes: 5 additions & 13 deletions src/gfa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ static bool should_write_as_w_line(const PathHandleGraph* graph, path_handle_t p
static void write_w_line(const PathHandleGraph* graph, ostream& out, path_handle_t path_handle, unordered_map<tuple<string, int64_t, string>, size_t>& last_phase_block_end);

void graph_to_gfa(const PathHandleGraph* graph, ostream& out, const set<string>& rgfa_paths,
bool rgfa_pline, bool use_w_lines, const string& rgfa_sample_name) {
bool rgfa_pline, bool use_w_lines) {

// TODO: Support sorting nodes, paths, and/or edges for canonical output
// TODO: Use a NamedNodeBackTranslation (or forward translation?) to properly round-trip GFA that has had to be chopped.

Expand Down Expand Up @@ -118,17 +118,13 @@ void graph_to_gfa(const PathHandleGraph* graph, ostream& out, const set<string>&

vector<path_handle_t> w_line_paths;

bool warned_about_tags_as_paths = false;
// Paths as P-lines
for (const path_handle_t& h : path_handles) {
auto path_name = graph->get_path_name(h);
if (get_rgfa_rank(path_name) > 0) {
if (!rgfa_paths.empty()) {
// the path was put into tags, no reason to deal with it here
continue;
} else if (!warned_about_tags_as_paths) {
cerr << "warning [gfa]: outputing rGFA cover (rank>=1) path(s) as a P-line(s) and not tags because no reference (rank==0) selected" << endl;
warned_about_tags_as_paths = true;
}
}
if (rgfa_pline || !rgfa_paths.count(path_name)) {
Expand Down Expand Up @@ -171,9 +167,6 @@ void graph_to_gfa(const PathHandleGraph* graph, ostream& out, const set<string>&
if (!rgfa_paths.empty()) {
// the path was put into tags, no reason to deal with it here
continue;
} else if (!warned_about_tags_as_paths) {
cerr << "warning [gfa]: outputing rGFA cover (rank>=1) path(s) as a W-line(s) and not tags because no reference (rank==0) selected" << endl;
warned_about_tags_as_paths = true;
}
}
write_w_line(graph, out, h, last_phase_block_end);
Expand Down Expand Up @@ -282,7 +275,7 @@ void write_w_line(const PathHandleGraph* graph, ostream& out, path_handle_t path
out << "\n";
}

int get_rgfa_rank(const string& path_name, const string& rgfa_sample) {
int get_rgfa_rank(const string& path_name) {
int rank = -1;

PathSense path_sense;
Expand All @@ -296,7 +289,6 @@ int get_rgfa_rank(const string& path_name, const string& rgfa_sample) {

size_t pos = path_locus.rfind(":SR:i:");
if (pos != string::npos && path_locus.length() - pos >= 6) {
assert(path_sample == rgfa_sample);
pos += 6;
size_t pos2 = path_locus.find(":", pos);
size_t len = pos2 == string::npos ? pos2 : pos2 - pos;
Expand Down Expand Up @@ -399,10 +391,10 @@ void rgfa_graph_cover(MutablePathMutableHandleGraph* graph,
size_t thread_count = get_thread_count();
vector<vector<pair<int64_t, vector<step_handle_t>>>> thread_covers(thread_count);

// we process top-level snarl_manager in parallel
// we process top-level snarls in parallel
snarl_manager->for_each_top_level_snarl_parallel([&](const Snarl* snarl) {
// per-thread output
// each fragment is a rank and vector of steps, the cove is a list of fragments
// each fragment is a rank and vector of steps, the cover is a list of fragments
// TODO: we can store just a first step and count instead of every fragment
vector<pair<int64_t, vector<step_handle_t>>>& cover_fragments = thread_covers.at(omp_get_thread_num());
// we also index the fragments by their node ids for fast lookups of what's covered by what
Expand Down
5 changes: 2 additions & 3 deletions src/gfa.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ using namespace std;
void graph_to_gfa(const PathHandleGraph* graph, ostream& out,
const set<string>& rgfa_paths = {},
bool rgfa_pline = false,
bool use_w_lines = true,
const string& rgfa_sample_name = "");
bool use_w_lines = true);


/// Prototype code to tag paths as rGFA paths. Either needs to be completely scrapped
Expand All @@ -48,7 +47,7 @@ void graph_to_gfa(const PathHandleGraph* graph, ostream& out,
/// Returns the RGFA rank (SR) of a path. This will be 0 for the reference
/// backbone, and higher the further number of (nested) bubbles away it is.
/// If the path is not an RGFA path, then return -1
int get_rgfa_rank(const string& path_name, const string& rgfa_sample="_rGFA_");
int get_rgfa_rank(const string& path_name);

/// Add the rgfa rank to a pathname, also setting its sample to the special rgfa sample and
/// moving its old sample into the locus field
Expand Down
28 changes: 14 additions & 14 deletions test/t/11_vg_paths.t
Original file line number Diff line number Diff line change
Expand Up @@ -59,46 +59,46 @@ is $? 0 "vg path coverage reports correct lengths in first column"

rm -f q.vg q.cov.len q.len

vg paths -v rgfa/rgfa_tiny.gfa -R 1 -Q x | vg view - > rgfa_tiny.rgfa
printf "P y[33-34]:SR:i:1 10+ *
P y[38-39]:SR:i:1 13+ *
P y[8-10]:SR:i:1 2+,4+ *\n" > rgfa_tiny_expected_fragments.rgfa
vg paths -v rgfa/rgfa_tiny.gfa -R 1 -Q x -N c | vg convert - -fW > rgfa_tiny.rgfa
printf "P c#0#SN:Z:y:SR:i:1[33-34] 10+ *
P c#0#SN:Z:y:SR:i:1[38-39] 13+ *
P c#0#SN:Z:y:SR:i:1[8-10] 2+,4+ *\n" > rgfa_tiny_expected_fragments.rgfa
grep ^P rgfa_tiny.rgfa | grep SR | sort > rgfa_tiny_fragments.rgfa
diff rgfa_tiny_fragments.rgfa rgfa_tiny_expected_fragments.rgfa
is $? 0 "Found the expected rgfa SNP cover of tiny graph"

rm -f rgfa_tiny.rgfa rgfa_tiny_expected_fragments.rgfa rgfa_tiny_fragments.rgfa

vg paths -v rgfa/rgfa_ins.gfa -R 5 -Q x | vg view - > rgfa_ins.rgfa
printf "P z[8-17]:SR:i:1 2+,3+,4+ *\n" > rgfa_ins_expected_fragments.rgfa
vg paths -v rgfa/rgfa_ins.gfa -R 5 -Q x -N c | vg convert - -fW > rgfa_ins.rgfa
printf "P c#0#SN:Z:z:SR:i:1[8-17] 2+,3+,4+ *\n" > rgfa_ins_expected_fragments.rgfa
grep ^P rgfa_ins.rgfa | grep SR | sort > rgfa_ins_fragments.rgfa
diff rgfa_ins_fragments.rgfa rgfa_ins_expected_fragments.rgfa
is $? 0 "Found the expected rgfa cover for simple nested insertion"

rm -f rgfa_ins.rgfa rgfa_ins_expected_fragments.rgfa rgfa_ins_fragments.rgfa

vg paths -v rgfa/rgfa_ins2.gfa -R 3 -Q x | vg view - > rgfa_ins2.rgfa
printf "P y[8-24]:SR:i:1 2+,6+,4+ *
P z[11-14]:SR:i:2 3+ *\n" > rgfa_ins2_expected_fragments.rgfa
vg paths -v rgfa/rgfa_ins2.gfa -R 3 -Q x | vg convert - -fW > rgfa_ins2.rgfa
printf "P _rGFA_#0#SN:Z:y:SR:i:1[8-24] 2+,6+,4+ *
P _rGFA_#0#SN:Z:z:SR:i:2[11-14] 3+ *\n" > rgfa_ins2_expected_fragments.rgfa
grep ^P rgfa_ins2.rgfa | grep SR | sort > rgfa_ins2_fragments.rgfa
diff rgfa_ins2_fragments.rgfa rgfa_ins2_expected_fragments.rgfa
is $? 0 "Found the expected rgfa cover for simple nested insertion that requires two fragments"

rm -f rgfa_ins2.rgfa rgfa_ins2_expected_fragments.rgfa rgfa_ins2_fragments.rgfa

vg paths -v rgfa/rgfa_ins2.gfa -R 5 -Q x | vg view - > rgfa_ins2R5.rgfa
printf "P y[8-24]:SR:i:1 2+,6+,4+ *\n" > rgfa_ins2R5_expected_fragments.rgfa
vg paths -v rgfa/rgfa_ins2.gfa -R 5 -Q x -N c | vg convert - -fW > rgfa_ins2R5.rgfa
printf "P c#0#SN:Z:y:SR:i:1[8-24] 2+,6+,4+ *\n" > rgfa_ins2R5_expected_fragments.rgfa
grep ^P rgfa_ins2R5.rgfa | grep SR | sort > rgfa_ins2R5_fragments.rgfa
diff rgfa_ins2R5_fragments.rgfa rgfa_ins2R5_expected_fragments.rgfa
is $? 0 "rgfa Minimum fragment length filters out small fragment"

rm -f rgfa_ins2R5.rgfa rgfa_ins2R5_expected_fragments.rgfa rgfa_ins2R5_fragments.rgfa

vg paths -v rgfa/rgfa_ins3.gfa -R 3 -Q x | vg view - > rgfa_ins3.rgfa
vg paths -v rgfa/rgfa_ins3.gfa -R 3 -Q x -N c | vg convert - -fW > rgfa_ins3.rgfa
printf "P x 1+,5+ *
P y[3-19]:SR:i:1 4+,6+,2+ *
P c#0#SN:Z:y:SR:i:1[3-19] 4+,6+,2+ *
P y 5-,4+,6+,2+,1- *
P z[11-14]:SR:i:2 3+ *
P c#0#SN:Z:z:SR:i:2[11-14] 3+ *
P z 1+,2-,3+,4-,5+ *\n" | sort > rgfa_ins3_expected_fragments.rgfa
grep ^P rgfa_ins3.rgfa | sort > rgfa_ins3_fragments.rgfa
diff rgfa_ins3_fragments.rgfa rgfa_ins3_expected_fragments.rgfa
Expand Down

1 comment on commit 5ee7304

@adamnovak
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch rgfa. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 10270 seconds

Please sign in to comment.