-
Notifications
You must be signed in to change notification settings - Fork 1
/
embeddedGraph.hpp
125 lines (99 loc) · 4.04 KB
/
embeddedGraph.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#ifndef COREGRAPH_EMBEDDEDGRAPH_HPP
#define COREGRAPH_EMBEDDEDGRAPH_HPP
#include <iostream>
#include <map>
#include <utility>
#include "ekg/vg/vg.hpp"
#include "ekg/vg/index.hpp"
// Hack around stupid name mangling issues
extern "C" {
#include "benedictpaten/pinchesAndCacti/inc/stPinchGraphs.h"
}
namespace coregraph {
/**
* Represents a vg graph that has been embedded in a pinch graph, as a series
* of pinched-together threads.
*/
class EmbeddedGraph {
public:
/**
* Construct an embedding of the given graph in the given thread set. Needs
* a place to deposit the sequences for the new threads it creates, and a
* function that can produce unique novel sequence names. Optionally, a
* string name can be given to the graph, although the passed string does
* not need to outlive the graph (as it is copied).
*/
EmbeddedGraph(vg::VG& graph, stPinchThreadSet* threadSet, std::map<int64_t, std::string>& threadSequences,
std::function<int64_t(void)> getId, const std::string& name="");
/**
* Trace out common paths between this embedded graph and the other graph
* embedded in the same stPinchThreadSet and pinch together.
*/
void pinchWith(EmbeddedGraph& other);
/**
* Merge this embedded graph with another on shared unique kmers. Takes two
* indexes. kmerSize gives the length of kmers to look for/generate, and
* edgeMax gives the max number of choice points in a kmer's kpath.
*/
void pinchOnKmers(vg::Index& ourIndex, EmbeddedGraph& other, vg::Index& theirIndex,
size_t kmerSize=1, size_t edgeMax=0);
/**
* Compute whether this graph is covered by paths, or whether any nodes
* exist that aren't on some path.
*/
bool isCoveredByPaths();
/**
* Return the name of the graph.
*/
const std::string& getName();
/**
* Convert a pinch thread set to a VG graph, broken up into several protobuf
* Graph objects, of suitable size for serialization. Graph objects are
* streamed out through the callback.
*/
static void threadSetToGraphs(stPinchThreadSet* threadSet, std::function<void(vg::Graph)> callback);
protected:
/**
* Scan along a path, and ensure that it is all perfect mappings. Returns
* the total length. The passed path must be part of this graph, not another
* graph.
*/
size_t scanPath(std::list<vg::Mapping>& path);
/**
* Pinch this graph witht he other graph along two corresponding paths.
*/
void pinchOnPaths(std::list<vg::Mapping>& path, EmbeddedGraph& other,
std::list<vg::Mapping>& otherPath);
/**
* Turn a kmer that starts at a certain position along a kpath into a list
* of Mappings covering only the bases in the kmer.
*/
static std::list<vg::Mapping> makeMinimalPath(std::string& kmer,
std::list<vg::NodeTraversal>::iterator occurrence, int offset,
std::list<vg::NodeTraversal>& path);
/**
* Reverse a path in this graph, to spell out the path of the reverse
* complement sequence.
*/
std::list<vg::Mapping> reverse_path(std::list<vg::Mapping> path);
/**
* Return true if the two given paths, generated by makeMinimalPath(), are
* equal, and false otherwise.
*/
static bool paths_equal(std::list<vg::Mapping>& path1, std::list<vg::Mapping>& path2);
// The graph we came from (which keeps track of the path data)
vg::VG& graph;
// The thread set that the graph is embedded in.
stPinchThreadSet* threadSet;
// The embedding, mapping from xg node ID to thread, start base,
// is reverse
std::map<int64_t, std::tuple<stPinchThread*, int64_t, bool>> embedding;
// This is the name we carry around. We keep our own copy.
std::string name;
// How many bytes can there be in the RocksDB estimate of a kmer size for us
// to get all the occurrences and actually count them, to see if it's
// unique?
const static int MAX_UNIQUE_KMER_BYTES = 512;
};
}
#endif