Skip to content

Commit

Permalink
Merge pull request #103 from pynbody/memmap-no-read
Browse files Browse the repository at this point in the history
Attempt to improve file writing performance on network file systems
  • Loading branch information
apontzen authored Mar 4, 2022
2 parents 7c123b5 + 1498413 commit fe7c160
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 21 deletions.
17 changes: 2 additions & 15 deletions genetIC/src/io/grafic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,13 +161,12 @@ namespace io {

for (size_t i_z = 0; i_z < targetGrid.size; ++i_z) {
pb.tick();
writeBlockHeaderFooter(block_lengths, files);

std::vector<tools::MemMapRegion<float>> varMaps;
for (int m = 0; m < 9; ++m)
varMaps.push_back(files[m].getMemMap<float>(targetGrid.size2));
varMaps.push_back(files[m].getMemMapFortran<float>(targetGrid.size2));

tools::MemMapRegion<size_t> idMap = files[9].getMemMap<size_t>(targetGrid.size2);
tools::MemMapRegion<size_t> idMap = files[9].getMemMapFortran<size_t>(targetGrid.size2);

#pragma omp parallel for
for (size_t i_y = 0; i_y < targetGrid.size; ++i_y) {
Expand Down Expand Up @@ -201,22 +200,10 @@ namespace io {

}
}
writeBlockHeaderFooter(block_lengths, files);
}
iordOffset += targetGrid.size3;
}

//! \brief Output the length in bytes of the fields, as header and footer to each data block, FORTRAN-style
/*!
\param block_lengths - lengths of blocks of data, for each file.
\param files - files to output to.
*/
void writeBlockHeaderFooter(const vector<size_t> &block_lengths, vector<tools::MemMapFileWriter> &files) const {
assert(block_lengths.size() == files.size());
for (size_t i = 0; i < block_lengths.size(); ++i) {
files[i].write<int>(int(block_lengths[i]));
}
}

//! \brief Output the header for a given level of the simulation.
/*!
Expand Down
33 changes: 27 additions & 6 deletions genetIC/src/tools/memmap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ namespace tools {
char *addr_aligned; //!< Address for the start of the current page
DataType *addr; //!< Address for data to be written to in the memory map
size_t size_bytes; //!< Size in bytes of the data to be written, plus any data since the start of the current page
std::vector<std::function<void()>> onEndCallbacks; //!< A callback for when the

/*! \brief Define a MemMapRegion for a given file descriptor, write location, and number of elements to be written
\param fd - file descriptor (-1 for errors)
Expand All @@ -34,16 +35,17 @@ namespace tools {
*/
MemMapRegion(int fd, size_t file_offset, size_t n_elements) {
size_bytes = n_elements*sizeof(DataType);
::lseek(fd, file_offset+size_bytes-1, SEEK_SET);
::write(fd,"",1);

::ftruncate(fd, file_offset+size_bytes);
::lseek(fd, file_offset+size_bytes, SEEK_SET);

size_t npage_offset = file_offset/::getpagesize(); // Number of full pages written at the current write position
size_t aligned_offset = npage_offset*::getpagesize(); // Beginning of page that the current write position is on
size_t byte_page_offset = file_offset-aligned_offset; // Distance from the beginning of the current page

size_bytes+=byte_page_offset;

addr_aligned = static_cast<char*>(::mmap(nullptr, size_bytes, PROT_READ | PROT_WRITE,
addr_aligned = static_cast<char*>(::mmap(nullptr, size_bytes, PROT_WRITE,
MAP_SHARED, fd, aligned_offset));

if(addr_aligned==MAP_FAILED)
Expand All @@ -62,9 +64,12 @@ namespace tools {
msync(addr_aligned, size_bytes, MS_ASYNC);
if(munmap(addr_aligned, size_bytes)!=0) {
// This probably indicates something has gone catastrophically wrong...
logging::entry() << "ERROR: Failed to delete the mem-map (reason: " << ::strerror(errno) << ")" << std::endl;
logging::entry(logging::level::warning) << "ERROR: Failed to delete the mem-map (reason: " << ::strerror(errno) << ")" << std::endl;
exit(1);
}
for(auto f: onEndCallbacks) {
f();
}
}
}

Expand All @@ -76,6 +81,11 @@ namespace tools {
(*this)=std::move(move);
}

//! Sets a callback function for when this memmap is being deconstructed
void onFinish(const std::function<void()> & f) {
this->onEndCallbacks.emplace_back(std::move(f));
}

//! Returns the data at offset from the current read/write location
DataType & operator[](size_t offset) {
return addr[offset];
Expand All @@ -86,6 +96,7 @@ namespace tools {
this->addr = move.addr;
this->addr_aligned = move.addr_aligned;
this->size_bytes = move.size_bytes;
this->onEndCallbacks = std::move(move.onEndCallbacks);
move.addr = nullptr;
move.addr_aligned = nullptr;
return (*this);
Expand Down Expand Up @@ -167,7 +178,14 @@ namespace tools {
auto getMemMap(size_t n_elements) {
auto region = MemMapRegion<DataType>(fd,offset,n_elements);
offset+=n_elements*sizeof(DataType);
::lseek(fd, offset, SEEK_SET);
region.onFinish([this]() {
// leave file position as though we just finished writing this in a "normal" way
// this is important for the fortran file writing below. Note that although the
// end of the fortran block could in principle be written before the memmap is
// created, this non-sequential writing caused erratically bad performance with
// network file systems
::lseek(this->fd, this->offset, SEEK_SET);
});
return region;
}

Expand All @@ -187,7 +205,10 @@ namespace tools {

write(fortranFieldSize);
auto region = getMemMap<DataType>(n_elements);
write(fortranFieldSize);
region.onFinish([fortranFieldSize, this]() {
this->write(fortranFieldSize);
});

return region;
}

Expand Down

0 comments on commit fe7c160

Please sign in to comment.