Skip to content
This repository has been archived by the owner on Oct 13, 2022. It is now read-only.

Commit

Permalink
Partial implementation of analyzeMode3Subgraph.
Browse files Browse the repository at this point in the history
  • Loading branch information
paoloczi committed May 9, 2022
1 parent 16074d5 commit e1fd5a3
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 0 deletions.
11 changes: 11 additions & 0 deletions scripts/AnalyzeMode3Subgraph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/python3

import shasta

segmentIds = [200, 300, 400]

a = shasta.Assembler()
a.accessMode3AssemblyGraph()
a.analyzeMode3Subgraph(segmentIds)


1 change: 1 addition & 0 deletions src/Assembler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2417,6 +2417,7 @@ class shasta::Assembler :
size_t threadCount);
shared_ptr<mode3::AssemblyGraph> assemblyGraph3Pointer;
void accessMode3AssemblyGraph();
void analyzeMode3Subgraph(const vector<uint64_t>& segmentIds);



Expand Down
6 changes: 6 additions & 0 deletions src/AssemblerMode3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,9 @@ void Assembler::accessMode3AssemblyGraph()
assemblyGraph3Pointer = std::make_shared<mode3::AssemblyGraph>(largeDataFileNamePrefix, markers, markerGraph);
}



void Assembler::analyzeMode3Subgraph(const vector<uint64_t>& segmentIds)
{
assemblyGraph3Pointer->analyzeSubgraph(segmentIds);
}
4 changes: 4 additions & 0 deletions src/PythonModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,10 @@ PYBIND11_MODULE(shasta, shastaModule)
.def("mode3Assembly",
&Assembler::mode3Assembly,
arg("threadCount") = 0)
.def("accessMode3AssemblyGraph",
&Assembler::accessMode3AssemblyGraph)
.def("analyzeMode3Subgraph",
&Assembler::analyzeMode3Subgraph)



Expand Down
94 changes: 94 additions & 0 deletions src/mode3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,71 @@ void AssemblyGraph::computeCompressedPseudoPath(



void AssemblyGraph::computeSegmentCompressedPseudoPathInfo()
{
const bool debug = true;

const uint64_t segmentCount = paths.size();
const uint64_t readCount = compressedPseudoPaths.size()/2;

createNew(segmentCompressedPseudoPathInfo, "Mode3-SegmentCompressedPseudoPathInfo");

// Pass 1.
segmentCompressedPseudoPathInfo.beginPass1(segmentCount);
for(ReadId readId=0; readId<readCount; readId++) {
for(Strand strand=0; strand<2; strand++) {
const OrientedReadId orientedReadId(readId, strand);
const auto compressedPseudoPath = compressedPseudoPaths[orientedReadId.getValue()];

for(uint64_t position=0; position<compressedPseudoPath.size(); position++) {
const CompressedPseudoPathEntry& compressedPseudoPathEntry =
compressedPseudoPath[position];
const uint64_t segmentId = compressedPseudoPathEntry.segmentId;
segmentCompressedPseudoPathInfo.incrementCount(segmentId);
}
}
}

// Pass 2.
segmentCompressedPseudoPathInfo.beginPass2();
for(ReadId readId=0; readId<readCount; readId++) {
for(Strand strand=0; strand<2; strand++) {
const OrientedReadId orientedReadId(readId, strand);
const auto compressedPseudoPath = compressedPseudoPaths[orientedReadId.getValue()];

for(uint64_t position=0; position<compressedPseudoPath.size(); position++) {
const CompressedPseudoPathEntry& compressedPseudoPathEntry =
compressedPseudoPath[position];
const uint64_t segmentId = compressedPseudoPathEntry.segmentId;
segmentCompressedPseudoPathInfo.store(segmentId, make_pair(orientedReadId, position));
}
}
}
segmentCompressedPseudoPathInfo.endPass2();

// Sort.
for(uint64_t segmentId=0; segmentId<segmentCount; segmentId++) {
const auto v = segmentCompressedPseudoPathInfo[segmentId];
sort(v.begin(), v.end());
}


if(debug) {
ofstream csv("SegmentCompressedPseudoPathInfo.csv");
csv << "SegmentId,OrientedReadId,Position in compressed pseudopath\n";
for(uint64_t segmentId=0; segmentId<segmentCount; segmentId++) {
const auto v = segmentCompressedPseudoPathInfo[segmentId];
for(const auto& p: v) {
csv << segmentId << ",";
csv << p.first << ",";
csv << p.second << "\n";
}
}
}
}



void AssemblyGraph::findTransitions(std::map<SegmentPair, Transitions>& transitionMap)
{
transitionMap.clear();
Expand Down Expand Up @@ -522,6 +587,7 @@ AssemblyGraph::AssemblyGraph(
computePseudoPaths(threadCount);
computeCompressedPseudoPaths();
pseudoPaths.remove();
computeSegmentCompressedPseudoPathInfo();

// Find pseudopath transitions and store them keyed by the pair of segments.
std::map<SegmentPair, Transitions> transitionMap;
Expand Down Expand Up @@ -562,6 +628,7 @@ AssemblyGraph::AssemblyGraph(
accessExistingReadOnly(segmentCoverage, "Mode3-SegmentCoverage");
accessExistingReadOnly(markerGraphEdgeTable, "Mode3-MarkerGraphEdgeTable");
accessExistingReadOnly(compressedPseudoPaths, "Mode3-CompressedPseudoPaths");
accessExistingReadOnly(segmentCompressedPseudoPathInfo, "Mode3-SegmentCompressedPseudoPathInfo");
accessExistingReadOnly(links, "Mode3-Links");
accessExistingReadOnly(transitions, "Mode3-Transitions");
accessExistingReadOnly(linksBySource, "Mode3-LinksBySource");
Expand Down Expand Up @@ -1203,4 +1270,31 @@ void AssemblyGraph::findDescendants(



// Analyze a subgraph of the assembly graph.
void AssemblyGraph::analyzeSubgraph(const vector<uint64_t>& segmentIds) const
{
// Gather triplets (orientedReadId, position in compressedPseudoPath, segmentId).
using Triplet = tuple<OrientedReadId, uint64_t, uint64_t>;
vector<Triplet> triplets;
for(const uint64_t segmentId: segmentIds) {
const auto v = segmentCompressedPseudoPathInfo[segmentId];
for(const auto& p: v) {
const OrientedReadId orientedReadId = p.first;
const uint64_t position = p.second;
triplets.push_back(Triplet(orientedReadId, position, segmentId));
}
}
sort(triplets.begin(), triplets.end());

// Write the triplets.
{
ofstream csv("Triplets.csv");
for(const Triplet& triplet: triplets) {
csv << get<0>(triplet) << ",";
csv << get<1>(triplet) << ",";
csv << get<2>(triplet) << "\n";
}
}

}

12 changes: 12 additions & 0 deletions src/mode3.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ class shasta::mode3::AssemblyGraph :

// The compressed pseudopath of an oriented read
// is the sequence of segmentIds it encounters.
// It stores only the first and last PseudoPathEntry
// on each segment.
// Note a segmentId can appear more than once on the compressed
// pseudopath of an oriented read.
class CompressedPseudoPathEntry {
Expand All @@ -154,6 +156,11 @@ class shasta::mode3::AssemblyGraph :
const span<PseudoPathEntry> pseudoPath,
vector<CompressedPseudoPathEntry>& compressedPseudoPath);

// Store appearances of segments in compressed pseudopaths.
// For each segment, store pairs (orientedReadId, position in compressed pseudo path).
MemoryMapped::VectorOfVectors<pair<OrientedReadId, uint64_t>, uint64_t> segmentCompressedPseudoPathInfo;
void computeSegmentCompressedPseudoPathInfo();



// A pseudopath transition occurs when the pseudopath of an oriented read
Expand Down Expand Up @@ -361,6 +368,11 @@ class shasta::mode3::AssemblyGraph :



// Analyze a subgraph of the assembly graph.
void analyzeSubgraph(const vector<uint64_t>& segmentIds) const;



// Compute link separation given a set of Transitions.
template<class Container> static double linkSeparation(
const Container& transitions,
Expand Down

0 comments on commit e1fd5a3

Please sign in to comment.