From a22c2c47ccc96451c3bc33e62a350296a808b511 Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Tue, 22 Feb 2022 22:18:35 +0000 Subject: [PATCH 1/5] Remove read access to memmap, possibly increasing efficiency especially on network file systems? --- genetIC/src/tools/memmap.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genetIC/src/tools/memmap.hpp b/genetIC/src/tools/memmap.hpp index f65a83a6..d9a1c39b 100644 --- a/genetIC/src/tools/memmap.hpp +++ b/genetIC/src/tools/memmap.hpp @@ -43,7 +43,7 @@ namespace tools { size_bytes+=byte_page_offset; - addr_aligned = static_cast(::mmap(nullptr, size_bytes, PROT_READ | PROT_WRITE, + addr_aligned = static_cast(::mmap(nullptr, size_bytes, PROT_WRITE, MAP_SHARED, fd, aligned_offset)); if(addr_aligned==MAP_FAILED) From 115b760431db32e1746c30014696c5112ae2903b Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Wed, 23 Feb 2022 22:14:07 +0000 Subject: [PATCH 2/5] Improve ordering of writes, and introduce NO_MEMMAP option Working towards improving performance on network filesystems. --- genetIC/src/io/grafic.hpp | 17 ++----------- genetIC/src/tools/memmap.hpp | 49 ++++++++++++++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 17 deletions(-) diff --git a/genetIC/src/io/grafic.hpp b/genetIC/src/io/grafic.hpp index d08a7e00..bd0fcf90 100644 --- a/genetIC/src/io/grafic.hpp +++ b/genetIC/src/io/grafic.hpp @@ -161,13 +161,12 @@ namespace io { for (size_t i_z = 0; i_z < targetGrid.size; ++i_z) { pb.tick(); - writeBlockHeaderFooter(block_lengths, files); std::vector> varMaps; for (int m = 0; m < 9; ++m) - varMaps.push_back(files[m].getMemMap(targetGrid.size2)); + varMaps.push_back(files[m].getMemMapFortran(targetGrid.size2)); - tools::MemMapRegion idMap = files[9].getMemMap(targetGrid.size2); + tools::MemMapRegion idMap = files[9].getMemMapFortran(targetGrid.size2); #pragma omp parallel for for (size_t i_y = 0; i_y < targetGrid.size; ++i_y) { @@ -201,22 +200,10 @@ namespace io { } } - writeBlockHeaderFooter(block_lengths, files); } iordOffset += targetGrid.size3; } - //! \brief Output the length in bytes of the fields, as header and footer to each data block, FORTRAN-style - /*! - \param block_lengths - lengths of blocks of data, for each file. - \param files - files to output to. - */ - void writeBlockHeaderFooter(const vector &block_lengths, vector &files) const { - assert(block_lengths.size() == files.size()); - for (size_t i = 0; i < block_lengths.size(); ++i) { - files[i].write(int(block_lengths[i])); - } - } //! \brief Output the header for a given level of the simulation. /*! diff --git a/genetIC/src/tools/memmap.hpp b/genetIC/src/tools/memmap.hpp index d9a1c39b..c1edd717 100644 --- a/genetIC/src/tools/memmap.hpp +++ b/genetIC/src/tools/memmap.hpp @@ -26,6 +26,10 @@ namespace tools { char *addr_aligned; //!< Address for the start of the current page DataType *addr; //!< Address for data to be written to in the memory map size_t size_bytes; //!< Size in bytes of the data to be written, plus any data since the start of the current page + std::vector> onEndCallbacks; //!< A callback for when the +#ifdef NO_MEMMAP + int fd; //!< File descriptor, only stored when running in NO_MEMMAP emulation mode +#endif /*! \brief Define a MemMapRegion for a given file descriptor, write location, and number of elements to be written \param fd - file descriptor (-1 for errors) @@ -34,6 +38,11 @@ namespace tools { */ MemMapRegion(int fd, size_t file_offset, size_t n_elements) { size_bytes = n_elements*sizeof(DataType); +#ifdef NO_MEMMAP + this->addr = new DataType[n_elements]; + this->fd = fd; +#else + ::lseek(fd, file_offset+size_bytes-1, SEEK_SET); ::write(fd,"",1); @@ -50,6 +59,7 @@ namespace tools { throw std::runtime_error("Failed to create a mem-map for output (reason: "+std::string(::strerror(errno))+")"); addr = reinterpret_cast(&addr_aligned[byte_page_offset]); +#endif } @@ -58,14 +68,31 @@ namespace tools { public: //! Destructor - exit with an error if we detect something has gone wrong deleting the memory map ~MemMapRegion() { +#ifdef NO_MEMMAP + if(addr!=nullptr) { + ssize_t rval = ::write(fd, reinterpret_cast(addr), size_bytes); + if (rval < 0) + logging::entry(logging::level::warning) << "Failed to write to file (reason: " << ::strerror(errno) << ")" + << std::endl; + else if (rval != size_bytes) + logging::entry(logging::level::warning) << "Kernel did not write sufficient data to the file" << std::endl; + delete addr; + addr = nullptr; + for(auto f: onEndCallbacks) { + f(); + } + + } +#else if (addr_aligned != nullptr) { msync(addr_aligned, size_bytes, MS_ASYNC); if(munmap(addr_aligned, size_bytes)!=0) { // This probably indicates something has gone catastrophically wrong... - logging::entry() << "ERROR: Failed to delete the mem-map (reason: " << ::strerror(errno) << ")" << std::endl; + logging::entry(logging::level::warning) << "ERROR: Failed to delete the mem-map (reason: " << ::strerror(errno) << ")" << std::endl; exit(1); } } +#endif } //! Disallow copying of the MemMapRegion object (to avoid two of them writing to the same block at the same time) @@ -76,6 +103,11 @@ namespace tools { (*this)=std::move(move); } + //! Sets a callback function for when this memmap is being deconstructed + void onFinish(const std::function & f) { + this->onEndCallbacks.emplace_back(std::move(f)); + } + //! Returns the data at offset from the current read/write location DataType & operator[](size_t offset) { return addr[offset]; @@ -84,8 +116,12 @@ namespace tools { //! Copy the data from another MemMapRegion and disable the old one MemMapRegion & operator=(MemMapRegion && move) noexcept { this->addr = move.addr; +#ifdef NO_MEMMAP + this->fd = move.fd; +#endif this->addr_aligned = move.addr_aligned; this->size_bytes = move.size_bytes; + this->onEndCallbacks = std::move(move.onEndCallbacks); move.addr = nullptr; move.addr_aligned = nullptr; return (*this); @@ -133,6 +169,10 @@ namespace tools { MemMapFileWriter(std::string filename) { // Open file in read/write mode, creating it if it doesn't already exist fd = ::open(filename.c_str(), O_RDWR | O_CREAT, (mode_t)0666); +#ifdef NO_MEMMAP + logging::entry(logging::level::debug) << "Caution: running with NO_MEMMAP, which emulates memmaps " + "for experimental testing" << std::endl; +#endif if(fd==-1) throw std::runtime_error("Failed to open file (reason: "+std::string(::strerror(errno))+")"); ::ftruncate(fd, 0); @@ -167,7 +207,9 @@ namespace tools { auto getMemMap(size_t n_elements) { auto region = MemMapRegion(fd,offset,n_elements); offset+=n_elements*sizeof(DataType); +#ifndef NO_MEMMAP ::lseek(fd, offset, SEEK_SET); +#endif return region; } @@ -187,7 +229,10 @@ namespace tools { write(fortranFieldSize); auto region = getMemMap(n_elements); - write(fortranFieldSize); + region.onFinish([fortranFieldSize, this]() { + this->write(fortranFieldSize); + }); + return region; } From 05653f63d5efb56831baa99675c9c88dee2420ef Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Wed, 23 Feb 2022 22:18:55 +0000 Subject: [PATCH 3/5] Fix fortran footer writing when using memmaps --- genetIC/src/tools/memmap.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/genetIC/src/tools/memmap.hpp b/genetIC/src/tools/memmap.hpp index c1edd717..0ba707e8 100644 --- a/genetIC/src/tools/memmap.hpp +++ b/genetIC/src/tools/memmap.hpp @@ -91,6 +91,9 @@ namespace tools { logging::entry(logging::level::warning) << "ERROR: Failed to delete the mem-map (reason: " << ::strerror(errno) << ")" << std::endl; exit(1); } + for(auto f: onEndCallbacks) { + f(); + } } #endif } From a7f6c77a84cb7680984d8d22101e35a509e32aaa Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Wed, 23 Feb 2022 22:32:37 +0000 Subject: [PATCH 4/5] Remove unnecessary write operation when creating memmaps --- genetIC/src/tools/memmap.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/genetIC/src/tools/memmap.hpp b/genetIC/src/tools/memmap.hpp index 0ba707e8..2a32c92c 100644 --- a/genetIC/src/tools/memmap.hpp +++ b/genetIC/src/tools/memmap.hpp @@ -43,8 +43,8 @@ namespace tools { this->fd = fd; #else - ::lseek(fd, file_offset+size_bytes-1, SEEK_SET); - ::write(fd,"",1); + ::ftruncate(fd, file_offset+size_bytes); + ::lseek(fd, file_offset+size_bytes, SEEK_SET); size_t npage_offset = file_offset/::getpagesize(); // Number of full pages written at the current write position size_t aligned_offset = npage_offset*::getpagesize(); // Beginning of page that the current write position is on From 1498413528c2de8544d07495269add13b6b8f796 Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Wed, 2 Mar 2022 16:00:54 +0000 Subject: [PATCH 5/5] Strip out NO_MEMMAP option which seems not to be needed, and tidy up memmap performance enhancements --- genetIC/src/tools/memmap.hpp | 43 +++++++----------------------------- 1 file changed, 8 insertions(+), 35 deletions(-) diff --git a/genetIC/src/tools/memmap.hpp b/genetIC/src/tools/memmap.hpp index 2a32c92c..650ca320 100644 --- a/genetIC/src/tools/memmap.hpp +++ b/genetIC/src/tools/memmap.hpp @@ -27,9 +27,6 @@ namespace tools { DataType *addr; //!< Address for data to be written to in the memory map size_t size_bytes; //!< Size in bytes of the data to be written, plus any data since the start of the current page std::vector> onEndCallbacks; //!< A callback for when the -#ifdef NO_MEMMAP - int fd; //!< File descriptor, only stored when running in NO_MEMMAP emulation mode -#endif /*! \brief Define a MemMapRegion for a given file descriptor, write location, and number of elements to be written \param fd - file descriptor (-1 for errors) @@ -38,10 +35,6 @@ namespace tools { */ MemMapRegion(int fd, size_t file_offset, size_t n_elements) { size_bytes = n_elements*sizeof(DataType); -#ifdef NO_MEMMAP - this->addr = new DataType[n_elements]; - this->fd = fd; -#else ::ftruncate(fd, file_offset+size_bytes); ::lseek(fd, file_offset+size_bytes, SEEK_SET); @@ -59,7 +52,6 @@ namespace tools { throw std::runtime_error("Failed to create a mem-map for output (reason: "+std::string(::strerror(errno))+")"); addr = reinterpret_cast(&addr_aligned[byte_page_offset]); -#endif } @@ -68,22 +60,6 @@ namespace tools { public: //! Destructor - exit with an error if we detect something has gone wrong deleting the memory map ~MemMapRegion() { -#ifdef NO_MEMMAP - if(addr!=nullptr) { - ssize_t rval = ::write(fd, reinterpret_cast(addr), size_bytes); - if (rval < 0) - logging::entry(logging::level::warning) << "Failed to write to file (reason: " << ::strerror(errno) << ")" - << std::endl; - else if (rval != size_bytes) - logging::entry(logging::level::warning) << "Kernel did not write sufficient data to the file" << std::endl; - delete addr; - addr = nullptr; - for(auto f: onEndCallbacks) { - f(); - } - - } -#else if (addr_aligned != nullptr) { msync(addr_aligned, size_bytes, MS_ASYNC); if(munmap(addr_aligned, size_bytes)!=0) { @@ -95,7 +71,6 @@ namespace tools { f(); } } -#endif } //! Disallow copying of the MemMapRegion object (to avoid two of them writing to the same block at the same time) @@ -119,9 +94,6 @@ namespace tools { //! Copy the data from another MemMapRegion and disable the old one MemMapRegion & operator=(MemMapRegion && move) noexcept { this->addr = move.addr; -#ifdef NO_MEMMAP - this->fd = move.fd; -#endif this->addr_aligned = move.addr_aligned; this->size_bytes = move.size_bytes; this->onEndCallbacks = std::move(move.onEndCallbacks); @@ -172,10 +144,6 @@ namespace tools { MemMapFileWriter(std::string filename) { // Open file in read/write mode, creating it if it doesn't already exist fd = ::open(filename.c_str(), O_RDWR | O_CREAT, (mode_t)0666); -#ifdef NO_MEMMAP - logging::entry(logging::level::debug) << "Caution: running with NO_MEMMAP, which emulates memmaps " - "for experimental testing" << std::endl; -#endif if(fd==-1) throw std::runtime_error("Failed to open file (reason: "+std::string(::strerror(errno))+")"); ::ftruncate(fd, 0); @@ -210,9 +178,14 @@ namespace tools { auto getMemMap(size_t n_elements) { auto region = MemMapRegion(fd,offset,n_elements); offset+=n_elements*sizeof(DataType); -#ifndef NO_MEMMAP - ::lseek(fd, offset, SEEK_SET); -#endif + region.onFinish([this]() { + // leave file position as though we just finished writing this in a "normal" way + // this is important for the fortran file writing below. Note that although the + // end of the fortran block could in principle be written before the memmap is + // created, this non-sequential writing caused erratically bad performance with + // network file systems + ::lseek(this->fd, this->offset, SEEK_SET); + }); return region; }