Skip to content

Commit

Permalink
Implement allocation tracking
Browse files Browse the repository at this point in the history
  • Loading branch information
0x002A committed Oct 28, 2021
1 parent f7748ec commit 3a25612
Show file tree
Hide file tree
Showing 15 changed files with 435 additions and 56 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ add_subdirectory("lib${LIBNAME}")
file(GLOB_RECURSE SOURCES RELATIVE ${CMAKE_SOURCE_DIR} "src/*.cpp")
add_executable(${TARGET} ${SOURCES})

option(ENABLE_TRACK_MEMORY_USAGE "Enable tracking of memory allocations" OFF)
if (ENABLE_TRACK_MEMORY_USAGE)
target_compile_definitions(${TARGET} PRIVATE TRACK_MEMORY_USAGE)
endif ()

option(ENABLE_BUILD_FAT_EXE "Enable linking dependencies statically" OFF)
if (ENABLE_BUILD_FAT_EXE)
target_link_options(${TARGET} PRIVATE -static)
Expand Down
50 changes: 50 additions & 0 deletions include/ms/IO.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// -*- C++ -*-
//===---------------------------------------------------------------------------------------------------------------==//
//
// Copyright (C) 2021 Kevin Klein
// This file is part of MuCHSALSA <https://github.com/0x002A/MuCHSALSA>.
//
// MuCHSALSA is free software: you can redistribute it and/or modify it under the terms of the GNU General
// Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any
// later version.
//
// MuCHSALSA is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
// details.
//
// You should have received a copy of the GNU General Public License along with MuCHSALSA.
// If not, see <http://www.gnu.org/licenses/>.
//
// SPDX-License-Identifier: GPL-3.0-or-later
//
//===---------------------------------------------------------------------------------------------------------------==//

#ifndef INCLUDED_MUCHSALSA_IO
#define INCLUDED_MUCHSALSA_IO

#pragma once

#include <cstddef>
#include <cstdint>
#include <cstdio>

namespace muchsalsa {

// =====================================================================================================================
// FREE FUNCTIONS
// =====================================================================================================================

/**
* Reads an entire line from a file, storing the address of the buffer containing the text using the supplied pointer.
* The buffer will be null-terminated and will contain the newline character, if one was found.
*
* @param pPtrBuffer a pointer to a pointer receiving the buffer address
* @param pSizeBuffer a pointer to a std::size_t representing the size of the supplied buffer
* @param pFile a pointer to a FILE handle required for accessing the file
* @return The number of characters read or -1 in the event of a failure
*/
int64_t readline(char **pPtrBuffer, std::size_t *pSizeBuffer, std::FILE *pFile);

} // namespace muchsalsa

#endif // INCLUDED_MUCHSALSA_IO
2 changes: 1 addition & 1 deletion include/ms/Util.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#include <type_traits>
#include <utility>

#define LB_UNUSED(x) (void)(x)
#define MS_UNUSED(x) (void)(x)

namespace muchsalsa::util {

Expand Down
2 changes: 1 addition & 1 deletion include/ms/graph/Graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -957,7 +957,7 @@ auto GraphUtil::getShortestPath(
}

for (auto const &[neighbor, pEdge] : _getReachableVertices(*pGraph, pVertex)) {
LB_UNUSED(pEdge);
MS_UNUSED(pEdge);
auto const *const pNeighbor = pGraph->getVertex(neighbor);

auto const distNeighbor = distances[pVertex] + 1;
Expand Down
27 changes: 12 additions & 15 deletions libms/src/BlastFileAccessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@

#include "BlastFileAccessor.h"

#include <cstdlib>
#include <gsl/pointers>

#include "IO.h"

namespace muchsalsa {

// =====================================================================================================================
Expand Down Expand Up @@ -51,12 +52,12 @@ std::string BlastFileAccessor::getLine(int64_t const &offset) {

std::fseek(m_pBlastFile.get(), offset, SEEK_SET);

char * pLine = nullptr;
std::size_t bufferSize = 0;
auto ret = getline(&pLine, &bufferSize, m_pBlastFile.get());
char *pLine = nullptr;
std::size_t sizeBuffer = 0;
auto ret = readline(&pLine, &sizeBuffer, m_pBlastFile.get());

auto result = [=]() {
if (ret != -1 && bufferSize > 0) {
if (ret != -1 && sizeBuffer > 0) {
auto line = std::string(pLine);
line.pop_back();

Expand All @@ -66,31 +67,27 @@ std::string BlastFileAccessor::getLine(int64_t const &offset) {
return std::string();
}();

if (pLine) {
std::free(pLine); // NOLINT
}
operator delete(pLine); // NOLINT

return result;
}

// PRIVATE CLASS METHODS

void BlastFileAccessor::_buildIndex() {
char * pLine = nullptr;
std::size_t bufferSize = 0;
char *pLine = nullptr;
std::size_t sizeBuffer = 0;
auto offset = std::ftell(m_pBlastFile.get());
auto ret = getline(&pLine, &bufferSize, m_pBlastFile.get());
auto ret = readline(&pLine, &sizeBuffer, m_pBlastFile.get());

while (ret != -1) {
m_offsets.push_back(offset);

offset = std::ftell(m_pBlastFile.get());
ret = getline(&pLine, &bufferSize, m_pBlastFile.get());
ret = readline(&pLine, &sizeBuffer, m_pBlastFile.get());
}

if (pLine) {
std::free(pLine); // NOLINT
}
operator delete(pLine); // NOLINT
}

} // namespace muchsalsa
Expand Down
101 changes: 101 additions & 0 deletions libms/src/IO.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// -*- C++ -*-
//===---------------------------------------------------------------------------------------------------------------==//
//
// Copyright (C) 2021 Kevin Klein
// This file is part of MuCHSALSA <https://github.com/0x002A/MuCHSALSA>.
//
// MuCHSALSA is free software: you can redistribute it and/or modify it under the terms of the GNU General
// Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any
// later version.
//
// MuCHSALSA is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
// details.
//
// You should have received a copy of the GNU General Public License along with MuCHSALSA.
// If not, see <http://www.gnu.org/licenses/>.
//
// SPDX-License-Identifier: GPL-3.0-or-later
//
//===---------------------------------------------------------------------------------------------------------------==//

#include "IO.h"

#include <cstring>

namespace muchsalsa {

// =====================================================================================================================
// HELPER
// =====================================================================================================================

namespace {

void *realloc(void *pOldMem, std::size_t sizeOld, std::size_t sizeNew) {
auto *pNewMem = ::operator new(sizeNew);

if (!pOldMem || !pNewMem) {
return nullptr;
}

std::memcpy(pNewMem, pOldMem, sizeOld);

::operator delete(pOldMem);

return pNewMem;
}

} // unnamed namespace

// =====================================================================================================================
// FREE FUNCTIONS
// =====================================================================================================================

int64_t readline(char **pPtrBuffer, std::size_t *pSizeBuffer, std::FILE *pFile) {
if (*pPtrBuffer == nullptr || *pSizeBuffer == 0) {
*pSizeBuffer = 120;

*pPtrBuffer = static_cast<char *>(::operator new(*pSizeBuffer));
if (*pPtrBuffer == nullptr) {
return -1;
}
}

auto *pCurrentBufferPos = *pPtrBuffer;
auto *pLastBufferPos = *pPtrBuffer + *pSizeBuffer - 1; // NOLINT
auto c = std::fgetc(pFile);
while (c != -1) {
*pCurrentBufferPos++ = static_cast<char>(c); // NOLINT
if (c == '\n') {
*pCurrentBufferPos = '\0';
return pCurrentBufferPos - *pPtrBuffer;
}
if (pCurrentBufferPos + 1 > pLastBufferPos) { // NOLINT
std::size_t sizeNewBuffer = *pSizeBuffer * 2;
auto charsInBuffer = static_cast<std::size_t>(pCurrentBufferPos - *pPtrBuffer);

auto *const pNewBuffer = static_cast<char *>(realloc(*pPtrBuffer, charsInBuffer, sizeNewBuffer));
if (!pNewBuffer) {
return -1;
}

*pPtrBuffer = pNewBuffer;
*pSizeBuffer = sizeNewBuffer;
pLastBufferPos = pNewBuffer + sizeNewBuffer; // NOLINT
pCurrentBufferPos = pNewBuffer + charsInBuffer; // NOLINT
}

c = std::fgetc(pFile);
}

if (std::feof(pFile) && pCurrentBufferPos != *pPtrBuffer) {
*pCurrentBufferPos = '\0';
return pCurrentBufferPos - *pPtrBuffer;
}

return -1;
}

} // namespace muchsalsa

// ---------------------------------------------------- END-OF-FILE ----------------------------------------------------
37 changes: 16 additions & 21 deletions libms/src/SequenceAccessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,12 @@

#include <algorithm>
#include <cctype>
#include <cstdlib>
#include <cstring>
#include <span>
#include <unordered_map>
#include <utility>
#include <vector>

#include "IO.h"
#include "Registry.h"
#include "Util.h"
#include "threading/Job.h"
Expand Down Expand Up @@ -145,28 +144,28 @@ void SequenceAccessor::_buildNanoporeIdx(gsl::not_null<threading::Job const *> p
auto const identifierDescline = m_nanoporeFileIsFastQ ? FASTQ_IDENTIFIER_DESCLINE : FASTA_IDENTIFIER_DESCLINE;
auto const identifierSplitline = m_nanoporeFileIsFastQ ? FASTQ_IDENTIFIER_SPLITLINE : FASTA_IDENTIFIER_DESCLINE;

char * pLine = nullptr;
std::size_t bufferSize = 0;
auto ret = getline(&pLine, &bufferSize, m_pNanoporeSequenceFile.get());
char *pLine = nullptr;
std::size_t sizeBuffer = 0;
auto ret = readline(&pLine, &sizeBuffer, m_pNanoporeSequenceFile.get());
auto offsetStart = std::ftell(m_pNanoporeSequenceFile.get());
while (ret != -1) {
if (*pLine == identifierDescline) {
break;
}

ret = getline(&pLine, &bufferSize, m_pNanoporeSequenceFile.get());
ret = readline(&pLine, &sizeBuffer, m_pNanoporeSequenceFile.get());
offsetStart = std::ftell(m_pNanoporeSequenceFile.get());
}

while (*pLine == identifierDescline) {
std::span const spanLine(pLine, bufferSize);
std::span const spanLine(pLine, sizeBuffer);
auto sequenceId = std::string(spanLine.subspan(1).data());
cleanSequenceId(sequenceId);

auto lengthCurrentSequence = 0L;

while (true) {
ret = getline(&pLine, &bufferSize, m_pNanoporeSequenceFile.get());
ret = readline(&pLine, &sizeBuffer, m_pNanoporeSequenceFile.get());
auto offsetEnd = std::ftell(m_pNanoporeSequenceFile.get());

if (ret == -1 || *pLine == identifierSplitline) {
Expand All @@ -180,41 +179,39 @@ void SequenceAccessor::_buildNanoporeIdx(gsl::not_null<threading::Job const *> p
}

while (ret != -1 && *pLine != identifierDescline) {
ret = getline(&pLine, &bufferSize, m_pNanoporeSequenceFile.get());
ret = readline(&pLine, &sizeBuffer, m_pNanoporeSequenceFile.get());
offsetStart = std::ftell(m_pNanoporeSequenceFile.get());
}
}

if (pLine) {
std::free(pLine); // NOLINT
}
operator delete(pLine); // NOLINT

std::any_cast<threading::WaitGroup *>(pJob->getParam(0))->done();
}

void SequenceAccessor::_buildIlluminaIdx(gsl::not_null<threading::Job const *> pJob) {
char * pLine = nullptr;
std::size_t bufferSize = 0;
auto ret = getline(&pLine, &bufferSize, m_pIlluminaSequenceFile.get());
char *pLine = nullptr;
std::size_t sizeBuffer = 0;
auto ret = readline(&pLine, &sizeBuffer, m_pIlluminaSequenceFile.get());
auto offsetStart = std::ftell(m_pIlluminaSequenceFile.get());
while (ret != -1) {
if (*pLine == FASTA_IDENTIFIER_DESCLINE) {
break;
}

ret = getline(&pLine, &bufferSize, m_pIlluminaSequenceFile.get());
ret = readline(&pLine, &sizeBuffer, m_pIlluminaSequenceFile.get());
offsetStart = std::ftell(m_pIlluminaSequenceFile.get());
}

while (*pLine == FASTA_IDENTIFIER_DESCLINE) {
std::span const spanLine(pLine, bufferSize);
std::span const spanLine(pLine, sizeBuffer);
auto sequenceId = std::string(spanLine.subspan(1).data());
cleanSequenceId(sequenceId);

auto lengthCurrentSequence = 0L;

while (true) {
ret = getline(&pLine, &bufferSize, m_pIlluminaSequenceFile.get());
ret = readline(&pLine, &sizeBuffer, m_pIlluminaSequenceFile.get());
auto offsetEnd = std::ftell(m_pIlluminaSequenceFile.get());

if (ret == -1 || *pLine == FASTA_IDENTIFIER_DESCLINE) {
Expand All @@ -228,9 +225,7 @@ void SequenceAccessor::_buildIlluminaIdx(gsl::not_null<threading::Job const *> p
}
}

if (pLine) {
std::free(pLine); // NOLINT
}
operator delete(pLine); // NOLINT

std::any_cast<threading::WaitGroup *>(pJob->getParam(0))->done();
}
Expand Down
4 changes: 2 additions & 2 deletions libms/src/graph/Graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ void GraphBase::_deleteVertex(gsl::not_null<Vertex const *> const pVertex, bool

if (!hasBidirectionalEdges) {
for (auto &[targetId, connectedVertices] : m_adjacencyList) {
LB_UNUSED(targetId);
MS_UNUSED(targetId);

auto const iterPredecessor = connectedVertices.find(pVertex->getId());
if (iterPredecessor != std::end(connectedVertices)) {
Expand Down Expand Up @@ -376,7 +376,7 @@ std::vector<muchsalsa::graph::Vertex const *> DiGraph::sortTopologically() const

auto const successors = getSuccessors(pVertex);
for (auto const &[targetId, pEdge] : successors) {
LB_UNUSED(pEdge);
MS_UNUSED(pEdge);

auto const *pSuccessor = getVertex(targetId);

Expand Down
Loading

0 comments on commit 3a25612

Please sign in to comment.