From 136b6454eae7b2af0c3bff213d723a9f843364c5 Mon Sep 17 00:00:00 2001 From: freshFruict <75492585+freshFruict@users.noreply.github.com> Date: Thu, 26 Dec 2024 18:42:27 +0300 Subject: [PATCH 01/14] string: add length management functions --- include/cista/containers/string.h | 127 +++++++++++++++---- include/cista/serialization.h | 5 +- test/string_test.cc | 197 ++++++++++++++++++++++++++++++ 3 files changed, 305 insertions(+), 24 deletions(-) diff --git a/include/cista/containers/string.h b/include/cista/containers/string.h index 5f0350eb..30e67e3f 100644 --- a/include/cista/containers/string.h +++ b/include/cista/containers/string.h @@ -8,6 +8,7 @@ #include #include "cista/containers/ptr.h" +#include "cista/endian/detection.h" #include "cista/exception.h" #include "cista/type_traits.h" @@ -75,9 +76,12 @@ struct generic_string { friend CharT* end(generic_string& s) { return s.end(); } bool is_short() const noexcept { return s_.is_short_; } + bool is_self_allocated() const noexcept { + return !is_short() && (h_.reserved_ != 0); + } void reset() noexcept { - if (!h_.is_short_ && h_.ptr_ != nullptr && h_.self_allocated_) { + if (is_self_allocated()) { std::free(data()); } h_ = heap{}; @@ -100,21 +104,11 @@ struct generic_string { if (str == nullptr || len == 0U) { return; } - s_.is_short_ = (len <= short_length_limit); - if (s_.is_short_) { - std::memcpy(s_.s_, str, len * sizeof(CharT)); - for (auto i = len; i < short_length_limit; ++i) { - s_.s_[i] = 0; - } - } else { - h_.ptr_ = static_cast(std::malloc(len * sizeof(CharT))); - if (h_.ptr_ == nullptr) { - throw_exception(std::bad_alloc{}); - } + internal_change_capacity(len); + if (!is_short()) { h_.size_ = len; - h_.self_allocated_ = true; - std::memcpy(data(), str, len * sizeof(CharT)); } + std::memcpy(data(), str, len * sizeof(CharT)); } void set_non_owning(std::basic_string const& v) { @@ -137,8 +131,7 @@ struct generic_string { return set_owning(str, len); } - h_.is_short_ = false; - h_.self_allocated_ = false; + h_ = heap{}; h_.ptr_ = str; h_.size_ = len; } @@ -167,13 +160,84 @@ struct generic_string { reset(); if (s.is_short()) { std::memcpy(static_cast(this), &s, sizeof(s)); - } else if (s.h_.self_allocated_) { + } else if (s.is_self_allocated()) { set_owning(s.data(), s.size()); } else { set_non_owning(s.data(), s.size()); } } + void internal_change_capacity(msize_t new_capacity) { + auto initialize_buffer = [](CharT* dest, msize_t capacity, CharT const* src, + msize_t size) -> void { + if (size && dest != src) { + std::memcpy(dest, src, size * sizeof(CharT)); + } + std::memset(dest + size, 0, (capacity - size) * sizeof(CharT)); + }; + auto make_heap = [](CharT* cur_buf, msize_t new_cap) -> heap { + heap h{}; + new_cap = (new_cap + 0xFFul) & 0xFF'FF'FF'00ul; +#ifdef CISTA_LITTLE_ENDIAN + h.reserved_ = new_cap; +#else + h.reserved_ = new_cap >> 8; +#endif + if (cur_buf) { + h.ptr_ = + static_cast(std::realloc(cur_buf, new_cap * sizeof(CharT))); + } else { + h.ptr_ = static_cast(std::malloc(new_cap * sizeof(CharT))); + } + if (!h.ptr_) { + throw_exception(std::bad_alloc{}); + } + return h; + }; + auto heap_ptr = [](heap& h) -> CharT* { + if constexpr (std::is_pointer_v) { + return const_cast(h.ptr_); + } else { + return const_cast(h.ptr_.get()); + } + }; + + if (new_capacity == 0) { + reset(); + return; + } + msize_t new_size = std::min(size(), new_capacity); + if (new_capacity <= short_length_limit) { + stack s{}; + initialize_buffer(s.s_, short_length_limit, data(), new_size); + if (!is_short()) { + reset(); + } + s_ = s; + } else { + heap h{}; + if (is_self_allocated()) { + h = make_heap(data(), new_capacity); + initialize_buffer(heap_ptr(h), h.reserved_, h.ptr_, new_size); + } else { + h = make_heap(nullptr, new_capacity); + initialize_buffer(heap_ptr(h), h.reserved_, data(), new_size); + } + h.size_ = new_size; + h_ = h; + } + } + constexpr msize_t capacity() const noexcept { + if (is_short()) { + return short_length_limit; + } +#ifdef CISTA_LITTLE_ENDIAN + return h_.reserved_; +#else + return h_.reserved_ << 8; +#endif + } + bool empty() const noexcept { return size() == 0U; } std::basic_string_view view() const noexcept { return {data(), size()}; @@ -352,7 +416,7 @@ struct generic_string { } generic_string& erase(msize_t const pos, msize_t const n) { - if (!is_short() && !h_.self_allocated_) { + if (!is_short() && !is_self_allocated()) { set_owning(view()); } auto const size_before = size(); @@ -424,9 +488,10 @@ struct generic_string { } struct heap { - bool is_short_{false}; - bool self_allocated_{false}; - std::uint16_t __fill__{0}; + union { + bool is_short_; + std::uint32_t reserved_{0}; + }; std::uint32_t size_{0}; Ptr ptr_{nullptr}; }; @@ -496,6 +561,26 @@ struct basic_string : public generic_string { base::set_owning(s); return *this; } + + void resize(msize_t new_size) { + if (new_size <= short_length_limit) { + internal_change_capacity(new_size); + return; + } + if (new_size > capacity()) { + internal_change_capacity(new_size); + } + if (new_size < h_.size_) { + std::memset(data() + new_size, 0, (h_.size_ - new_size) * sizeof(CharT)); + } + h_.size_ = new_size; + } + void reserve(msize_t cap) { + if (cap > capacity()) { + internal_change_capacity(cap); + } + } + void shrink_to_fit() { internal_change_capacity(size()); } }; template diff --git a/include/cista/serialization.h b/include/cista/serialization.h index 559960da..bb39c143 100644 --- a/include/cista/serialization.h +++ b/include/cista/serialization.h @@ -236,7 +236,7 @@ void serialize(Ctx& c, generic_string const* origin, offset_t const pos) { : start - cista_member_offset(Type, h_.ptr_) - pos)); c.write(pos + cista_member_offset(Type, h_.size_), convert_endian(origin->h_.size_)); - c.write(pos + cista_member_offset(Type, h_.self_allocated_), false); + c.write(pos + cista_member_offset(Type, h_.reserved_), 0ul); } template * el) { if (!el->is_short()) { c.check_ptr(el->h_.ptr_, el->h_.size_ * sizeof(typename generic_string::CharT)); - c.check_bool(el->h_.self_allocated_); - c.require(!el->h_.self_allocated_, "string self-allocated"); + c.require(!el->is_self_allocated(), "string self-allocated"); c.require((el->h_.size_ == 0) == (el->h_.ptr_ == nullptr), "str size=0 <=> ptr=0"); } diff --git a/test/string_test.cc b/test/string_test.cc index 5f9cfb05..c8892064 100644 --- a/test/string_test.cc +++ b/test/string_test.cc @@ -1,4 +1,6 @@ +#include #include +#include #include #include "doctest.h" @@ -441,3 +443,198 @@ TEST_CASE("u32string ends_with") { CHECK(s[3].ends_with(U'\U0001F4BB')); CHECK(!s[3].ends_with(U'A')); } + +TEST_SUITE("string internal_change_capacity") { + TEST_CASE("string internal_change_capacity") { + string s{}; + std::vector> cap = { + {0, 0}, {1, 15}, {14, 15}, {15, 15}, {16, 256}, {128, 256}, + {255, 256}, {256, 256}, {257, 512}, {256, 256}, {15, 15}, {10, 15}}; + + for (auto c : cap) { + s.internal_change_capacity(c.first); + std::fill(s.data(), s.data() + c.first, 'A'); + CHECK(s.capacity() == c.second); + CHECK(std::all_of(s.data() + c.first, s.data() + c.second, + [](char a) { return a == 0; })); + } + } + + TEST_CASE("u16string internal_change_capacity") { + u16string s{}; + std::vector> cap = { + {0, 0}, {1, 7}, {6, 7}, {7, 7}, {8, 256}, {128, 256}, + {255, 256}, {256, 256}, {257, 512}, {256, 256}, {7, 7}, {3, 7}}; + + for (auto c : cap) { + s.internal_change_capacity(c.first); + std::fill(s.data(), s.data() + c.first, u'A'); + CHECK(s.capacity() == c.second); + CHECK(std::all_of(s.data() + c.first, s.data() + c.second, + [](char16_t a) { return a == 0; })); + } + } + + TEST_CASE("u32string internal_change_capacity") { + u32string s{}; + std::vector> cap = { + {0, 0}, {1, 3}, {2, 3}, {3, 3}, {4, 256}, {128, 256}, + {255, 256}, {256, 256}, {257, 512}, {256, 256}, {3, 3}, {1, 3}}; + + for (auto c : cap) { + s.internal_change_capacity(c.first); + std::fill(s.data(), s.data() + c.first, U'A'); + CHECK(s.capacity() == c.second); + CHECK(std::all_of(s.data() + c.first, s.data() + c.second, + [](char32_t a) { return a == 0; })); + } + } +} + +TEST_SUITE("string resize") { + TEST_CASE("string resize") { + string str{}; + std::vector size = {0, 1, 14, 15, 16, 128, + 255, 256, 257, 256, 15, 10}; + + for (auto s : size) { + auto old_size = str.size(); + str.resize(s); + if (s > 15) { + CHECK(str.size() == s); + } + if (s > old_size) { + CHECK(std::all_of(str.begin(), str.begin() + old_size, + [](char a) { return a == 'A'; })); + } + CHECK(std::all_of(str.data() + str.size(), str.data() + str.capacity(), + [](char a) { return a == 0; })); + std::fill(str.begin(), str.begin() + s, 'A'); + } + } + + TEST_CASE("u16string resize") { + u16string str{}; + std::vector size = {0, 1, 6, 7, 8, 128, 255, 256, 257, 256, 7, 3}; + + for (auto s : size) { + auto old_size = str.size(); + str.resize(s); + if (s > 7) { + CHECK(str.size() == s); + } + if (s > old_size) { + CHECK(std::all_of(str.begin(), str.begin() + old_size, + [](char16_t a) { return a == u'A'; })); + } + CHECK(std::all_of(str.data() + str.size(), str.data() + str.capacity(), + [](char16_t a) { return a == 0; })); + std::fill(str.begin(), str.begin() + s, u'A'); + } + } + + TEST_CASE("u32string resize") { + u32string str{}; + std::vector size = {0, 1, 2, 3, 4, 128, 255, 256, 257, 256, 3, 1}; + + for (auto s : size) { + auto old_size = str.size(); + str.resize(s); + if (s > 3) { + CHECK(str.size() == s); + } + if (s > old_size) { + CHECK(std::all_of(str.begin(), str.begin() + old_size, + [](char32_t a) { return a == U'A'; })); + } + CHECK(std::all_of(str.data() + str.size(), str.data() + str.capacity(), + [](char32_t a) { return a == 0; })); + std::fill(str.begin(), str.begin() + s, U'A'); + } + } +} + +TEST_SUITE("string reserve") { + TEST_CASE("string reserve") { + string str{}; + std::vector> cap = { + {0, 0}, {1, 15}, {14, 15}, {15, 15}, {16, 256}, {128, 256}, + {255, 256}, {256, 256}, {257, 512}, {256, 512}, {15, 512}, {10, 512}}; + + for (auto c : cap) { + str.reserve(c.first); + CHECK(str.capacity() == c.second); + } + } + + TEST_CASE("u16string reserve") { + u16string str{}; + std::vector> cap = { + {0, 0}, {1, 7}, {6, 7}, {7, 7}, {8, 256}, {128, 256}, + {255, 256}, {256, 256}, {257, 512}, {256, 512}, {7, 512}, {3, 512}}; + + for (auto c : cap) { + str.reserve(c.first); + CHECK(str.capacity() == c.second); + } + } + + TEST_CASE("u32string reserve") { + u32string str{}; + std::vector> cap = { + {0, 0}, {1, 3}, {2, 3}, {3, 3}, {4, 256}, {128, 256}, + {255, 256}, {256, 256}, {257, 512}, {256, 512}, {3, 512}, {1, 512}}; + + for (auto c : cap) { + str.reserve(c.first); + CHECK(str.capacity() == c.second); + } + } +} + +TEST_SUITE("string shrink_to_fit") { + TEST_CASE("string shrink_to_fit") { + string str{}; + std::vector> cap = { + {0, 0}, {1, 15}, {14, 15}, {15, 15}, {16, 256}, {128, 256}, + {255, 256}, {256, 256}, {257, 512}, {256, 256}, {15, 15}, {10, 15}}; + + for (auto c : cap) { + str.reserve(1024); + str.resize(c.first); + std::fill(str.data(), str.data() + c.first, 'A'); + str.shrink_to_fit(); + CHECK(str.capacity() == c.second); + } + } + + TEST_CASE("u16string shrink_to_fit") { + u16string str{}; + std::vector> cap = { + {0, 0}, {1, 7}, {6, 7}, {7, 7}, {8, 256}, {128, 256}, + {255, 256}, {256, 256}, {257, 512}, {256, 256}, {7, 7}, {3, 7}}; + + for (auto c : cap) { + str.reserve(1024); + str.resize(c.first); + std::fill(str.data(), str.data() + c.first, u'A'); + str.shrink_to_fit(); + CHECK(str.capacity() == c.second); + } + } + + TEST_CASE("u32string shrink_to_fit") { + u32string str{}; + std::vector> cap = { + {0, 0}, {1, 3}, {2, 3}, {3, 3}, {4, 256}, {128, 256}, + {255, 256}, {256, 256}, {257, 512}, {256, 256}, {3, 3}, {1, 3}}; + + for (auto c : cap) { + str.reserve(1024); + str.resize(c.first); + std::fill(str.data(), str.data() + c.first, U'A'); + str.shrink_to_fit(); + CHECK(str.capacity() == c.second); + } + } +} \ No newline at end of file From 67384affd82c0c4c0744f776ea9ede4a2affc13d Mon Sep 17 00:00:00 2001 From: freshFruict <75492585+freshFruict@users.noreply.github.com> Date: Thu, 26 Dec 2024 19:11:07 +0300 Subject: [PATCH 02/14] Fix clang compilation error --- include/cista/containers/string.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/include/cista/containers/string.h b/include/cista/containers/string.h index 30e67e3f..0cfeec42 100644 --- a/include/cista/containers/string.h +++ b/include/cista/containers/string.h @@ -513,6 +513,7 @@ struct generic_string { template struct basic_string : public generic_string { using base = generic_string; + using msize_t = typename base::msize_t; using CharT = typename base::CharT; using base::base; @@ -563,24 +564,25 @@ struct basic_string : public generic_string { } void resize(msize_t new_size) { - if (new_size <= short_length_limit) { - internal_change_capacity(new_size); + if (new_size <= base::short_length_limit) { + base::internal_change_capacity(new_size); return; } - if (new_size > capacity()) { - internal_change_capacity(new_size); + if (new_size > base::capacity()) { + base::internal_change_capacity(new_size); } - if (new_size < h_.size_) { - std::memset(data() + new_size, 0, (h_.size_ - new_size) * sizeof(CharT)); + if (new_size < base::h_.size_) { + std::memset(data() + new_size, 0, + (base::h_.size_ - new_size) * sizeof(CharT)); } - h_.size_ = new_size; + base::h_.size_ = new_size; } void reserve(msize_t cap) { - if (cap > capacity()) { - internal_change_capacity(cap); + if (cap > base::capacity()) { + base::internal_change_capacity(cap); } } - void shrink_to_fit() { internal_change_capacity(size()); } + void shrink_to_fit() { base::internal_change_capacity(size()); } }; template From ba706808b4141ec797531871ec30c2ae8b899396 Mon Sep 17 00:00:00 2001 From: freshFruict <75492585+freshFruict@users.noreply.github.com> Date: Thu, 26 Dec 2024 19:14:27 +0300 Subject: [PATCH 03/14] Fix clang 'undeclared identifier' --- include/cista/containers/string.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/cista/containers/string.h b/include/cista/containers/string.h index 0cfeec42..1e78efc7 100644 --- a/include/cista/containers/string.h +++ b/include/cista/containers/string.h @@ -572,7 +572,7 @@ struct basic_string : public generic_string { base::internal_change_capacity(new_size); } if (new_size < base::h_.size_) { - std::memset(data() + new_size, 0, + std::memset(base::data() + new_size, 0, (base::h_.size_ - new_size) * sizeof(CharT)); } base::h_.size_ = new_size; @@ -582,7 +582,7 @@ struct basic_string : public generic_string { base::internal_change_capacity(cap); } } - void shrink_to_fit() { base::internal_change_capacity(size()); } + void shrink_to_fit() { base::internal_change_capacity(base::size()); } }; template From ed51d8f8b80c4f72e07e42ab060b4232c3f7b281 Mon Sep 17 00:00:00 2001 From: freshFruict <75492585+freshFruict@users.noreply.github.com> Date: Sat, 28 Dec 2024 22:05:58 +0300 Subject: [PATCH 04/14] Fix string serialization on Linux --- include/cista/serialization.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/cista/serialization.h b/include/cista/serialization.h index bb39c143..8d061076 100644 --- a/include/cista/serialization.h +++ b/include/cista/serialization.h @@ -236,7 +236,7 @@ void serialize(Ctx& c, generic_string const* origin, offset_t const pos) { : start - cista_member_offset(Type, h_.ptr_) - pos)); c.write(pos + cista_member_offset(Type, h_.size_), convert_endian(origin->h_.size_)); - c.write(pos + cista_member_offset(Type, h_.reserved_), 0ul); + c.write(pos + cista_member_offset(Type, h_.reserved_), std::uint32_t{0}); } template Date: Sat, 28 Dec 2024 22:48:27 +0300 Subject: [PATCH 05/14] string improve resize tests --- test/string_test.cc | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/test/string_test.cc b/test/string_test.cc index c8892064..981011fa 100644 --- a/test/string_test.cc +++ b/test/string_test.cc @@ -503,10 +503,8 @@ TEST_SUITE("string resize") { if (s > 15) { CHECK(str.size() == s); } - if (s > old_size) { - CHECK(std::all_of(str.begin(), str.begin() + old_size, - [](char a) { return a == 'A'; })); - } + CHECK(std::all_of(str.begin(), str.begin() + std::min(old_size, s), + [](char a) { return a == 'A'; })); CHECK(std::all_of(str.data() + str.size(), str.data() + str.capacity(), [](char a) { return a == 0; })); std::fill(str.begin(), str.begin() + s, 'A'); @@ -523,10 +521,8 @@ TEST_SUITE("string resize") { if (s > 7) { CHECK(str.size() == s); } - if (s > old_size) { - CHECK(std::all_of(str.begin(), str.begin() + old_size, - [](char16_t a) { return a == u'A'; })); - } + CHECK(std::all_of(str.begin(), str.begin() + std::min(old_size, s), + [](char16_t a) { return a == u'A'; })); CHECK(std::all_of(str.data() + str.size(), str.data() + str.capacity(), [](char16_t a) { return a == 0; })); std::fill(str.begin(), str.begin() + s, u'A'); @@ -543,10 +539,8 @@ TEST_SUITE("string resize") { if (s > 3) { CHECK(str.size() == s); } - if (s > old_size) { - CHECK(std::all_of(str.begin(), str.begin() + old_size, - [](char32_t a) { return a == U'A'; })); - } + CHECK(std::all_of(str.begin(), str.begin() + std::min(old_size, s), + [](char32_t a) { return a == U'A'; })); CHECK(std::all_of(str.data() + str.size(), str.data() + str.capacity(), [](char32_t a) { return a == 0; })); std::fill(str.begin(), str.begin() + s, U'A'); From 7eff551c62093f0408f1c8fa0c344583e5393cef Mon Sep 17 00:00:00 2001 From: freshFruict <75492585+freshFruict@users.noreply.github.com> Date: Sat, 28 Dec 2024 22:50:29 +0300 Subject: [PATCH 06/14] string: fix types --- include/cista/containers/string.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/cista/containers/string.h b/include/cista/containers/string.h index 1e78efc7..b02a276a 100644 --- a/include/cista/containers/string.h +++ b/include/cista/containers/string.h @@ -177,7 +177,7 @@ struct generic_string { }; auto make_heap = [](CharT* cur_buf, msize_t new_cap) -> heap { heap h{}; - new_cap = (new_cap + 0xFFul) & 0xFF'FF'FF'00ul; + new_cap = (new_cap + msize_t{0xFF}) & msize_t{0xFF'FF'FF'00}; #ifdef CISTA_LITTLE_ENDIAN h.reserved_ = new_cap; #else From 37f2235857e0c64a5dd5c7d906bf250dd689ca3b Mon Sep 17 00:00:00 2001 From: freshFruict <75492585+freshFruict@users.noreply.github.com> Date: Fri, 17 Jan 2025 05:00:07 +0300 Subject: [PATCH 07/14] generic_string: fix internal_change_capacity behaviour on big-endian systems --- include/cista/containers/string.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/include/cista/containers/string.h b/include/cista/containers/string.h index b02a276a..d3c5a7bf 100644 --- a/include/cista/containers/string.h +++ b/include/cista/containers/string.h @@ -218,10 +218,10 @@ struct generic_string { heap h{}; if (is_self_allocated()) { h = make_heap(data(), new_capacity); - initialize_buffer(heap_ptr(h), h.reserved_, h.ptr_, new_size); + initialize_buffer(heap_ptr(h), h.capacity(), h.ptr_, new_size); } else { h = make_heap(nullptr, new_capacity); - initialize_buffer(heap_ptr(h), h.reserved_, data(), new_size); + initialize_buffer(heap_ptr(h), h.capacity(), data(), new_size); } h.size_ = new_size; h_ = h; @@ -494,6 +494,14 @@ struct generic_string { }; std::uint32_t size_{0}; Ptr ptr_{nullptr}; + + std::uint32_t capacity() const noexcept { +#ifdef CISTA_LITTLE_ENDIAN + return reserved_; +#else + return reserved_ << 8; +#endif + } }; struct stack { From 42f3c0141b9b3675adfa6b5868f06a4add111f9f Mon Sep 17 00:00:00 2001 From: freshFruict <75492585+freshFruict@users.noreply.github.com> Date: Fri, 17 Jan 2025 05:14:52 +0300 Subject: [PATCH 08/14] generic_string: refactor --- include/cista/containers/string.h | 54 +++++++++++++------------------ include/cista/serialization.h | 2 +- 2 files changed, 23 insertions(+), 33 deletions(-) diff --git a/include/cista/containers/string.h b/include/cista/containers/string.h index d3c5a7bf..5606ac56 100644 --- a/include/cista/containers/string.h +++ b/include/cista/containers/string.h @@ -77,7 +77,7 @@ struct generic_string { bool is_short() const noexcept { return s_.is_short_; } bool is_self_allocated() const noexcept { - return !is_short() && (h_.reserved_ != 0); + return !is_short() && (h_.capacity_ != 0); } void reset() noexcept { @@ -176,31 +176,20 @@ struct generic_string { std::memset(dest + size, 0, (capacity - size) * sizeof(CharT)); }; auto make_heap = [](CharT* cur_buf, msize_t new_cap) -> heap { + new_cap = (new_cap + msize_t{0xFF}) & ~msize_t{0xFF}; heap h{}; - new_cap = (new_cap + msize_t{0xFF}) & msize_t{0xFF'FF'FF'00}; #ifdef CISTA_LITTLE_ENDIAN - h.reserved_ = new_cap; + h.capacity_ = new_cap; #else - h.reserved_ = new_cap >> 8; + h.capacity_ = new_cap >> 8; #endif - if (cur_buf) { - h.ptr_ = - static_cast(std::realloc(cur_buf, new_cap * sizeof(CharT))); - } else { - h.ptr_ = static_cast(std::malloc(new_cap * sizeof(CharT))); - } + h.ptr_ = + static_cast(std::realloc(cur_buf, new_cap * sizeof(CharT))); if (!h.ptr_) { throw_exception(std::bad_alloc{}); } return h; }; - auto heap_ptr = [](heap& h) -> CharT* { - if constexpr (std::is_pointer_v) { - return const_cast(h.ptr_); - } else { - return const_cast(h.ptr_.get()); - } - }; if (new_capacity == 0) { reset(); @@ -218,10 +207,12 @@ struct generic_string { heap h{}; if (is_self_allocated()) { h = make_heap(data(), new_capacity); - initialize_buffer(heap_ptr(h), h.capacity(), h.ptr_, new_size); + initialize_buffer(const_cast(h.ptr()), h.capacity(), h.ptr(), + new_size); } else { h = make_heap(nullptr, new_capacity); - initialize_buffer(heap_ptr(h), h.capacity(), data(), new_size); + initialize_buffer(const_cast(h.ptr()), h.capacity(), data(), + new_size); } h.size_ = new_size; h_ = h; @@ -231,11 +222,7 @@ struct generic_string { if (is_short()) { return short_length_limit; } -#ifdef CISTA_LITTLE_ENDIAN - return h_.reserved_; -#else - return h_.reserved_ << 8; -#endif + return h_.capacity(); } bool empty() const noexcept { return size() == 0U; } @@ -395,11 +382,7 @@ struct generic_string { } CharT const* internal_data() const noexcept { - if constexpr (std::is_pointer_v) { - return is_short() ? s_.s_ : h_.ptr_; - } else { - return is_short() ? s_.s_ : h_.ptr_.get(); - } + return is_short() ? s_.s_ : h_.ptr(); } CharT* data() noexcept { return const_cast(internal_data()); } @@ -490,18 +473,25 @@ struct generic_string { struct heap { union { bool is_short_; - std::uint32_t reserved_{0}; + std::uint32_t capacity_{0}; }; std::uint32_t size_{0}; Ptr ptr_{nullptr}; std::uint32_t capacity() const noexcept { #ifdef CISTA_LITTLE_ENDIAN - return reserved_; + return capacity_; #else - return reserved_ << 8; + return capacity_ << 8; #endif } + CharT const* ptr() const noexcept { + if constexpr (std::is_pointer_v) { + return ptr_; + } else { + return ptr_.get(); + } + } }; struct stack { diff --git a/include/cista/serialization.h b/include/cista/serialization.h index 8d061076..88657a0c 100644 --- a/include/cista/serialization.h +++ b/include/cista/serialization.h @@ -236,7 +236,7 @@ void serialize(Ctx& c, generic_string const* origin, offset_t const pos) { : start - cista_member_offset(Type, h_.ptr_) - pos)); c.write(pos + cista_member_offset(Type, h_.size_), convert_endian(origin->h_.size_)); - c.write(pos + cista_member_offset(Type, h_.reserved_), std::uint32_t{0}); + c.write(pos + cista_member_offset(Type, h_.capacity_), std::uint32_t{0}); } template Date: Wed, 22 Jan 2025 07:48:40 +0300 Subject: [PATCH 09/14] string test: add test of capacity correctness during serialization --- test/cstring_serialize_test.cc | 42 ++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/test/cstring_serialize_test.cc b/test/cstring_serialize_test.cc index ac4e0f1c..31f06788 100644 --- a/test/cstring_serialize_test.cc +++ b/test/cstring_serialize_test.cc @@ -203,3 +203,45 @@ TEST_CASE("u32string serialization endian long") { CHECK(*serialized_be == U32STR_LONG_CORNER_CASE); } + +TEST_CASE_TEMPLATE("string serialization capacity", StrT, cista::raw::string, + u16string, u32string) { + using CharT = typename StrT::CharT; + auto get_short = []() -> CharT const* { + void const* ptr; + switch (sizeof(CharT)) { + case sizeof(char): ptr = SHORT_STR; break; + case sizeof(char16_t): ptr = U16STR_SHORT; break; + case sizeof(char32_t): ptr = U32STR_SHORT; break; + } + return static_cast(ptr); + }; + auto get_long = []() -> CharT const* { + void const* ptr; + switch (sizeof(CharT)) { + case sizeof(char): ptr = LONG_STR; break; + case sizeof(char16_t): ptr = U16STR_LONG; break; + case sizeof(char32_t): ptr = U32STR_LONG; break; + } + return static_cast(ptr); + }; + + StrT s_s = get_short(), s_l = get_long(); + cista::byte_buf buf_s = cista::serialize(s_s), buf_l = cista::serialize(s_l); + StrT *serialized_s = cista::deserialize(buf_s), + *serialized_l = cista::deserialize(buf_l); + CharT const *ptr_s = serialized_s->data(), *ptr_l = serialized_l->data(); + + CHECK(serialized_s->capacity() == StrT::short_length_limit); + CHECK(serialized_l->capacity() == 0); + + serialized_s->shrink_to_fit(); + serialized_l->shrink_to_fit(); + + CHECK(serialized_s->capacity() == StrT::short_length_limit); + CHECK(serialized_l->capacity() == 256); + CHECK(ptr_s == serialized_s->data()); + CHECK(ptr_l != serialized_l->data()); + CHECK(*serialized_s == get_short()); + CHECK(*serialized_l == get_long()); +} From 2cbc16369157a3989a8b9889367f7aae2dbd0e4f Mon Sep 17 00:00:00 2001 From: freshFruict <75492585+freshFruict@users.noreply.github.com> Date: Wed, 22 Jan 2025 18:36:44 +0300 Subject: [PATCH 10/14] string test: fix memory leak due to missing destructor call --- test/cstring_serialize_test.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/cstring_serialize_test.cc b/test/cstring_serialize_test.cc index 31f06788..76b98a6a 100644 --- a/test/cstring_serialize_test.cc +++ b/test/cstring_serialize_test.cc @@ -244,4 +244,7 @@ TEST_CASE_TEMPLATE("string serialization capacity", StrT, cista::raw::string, CHECK(ptr_l != serialized_l->data()); CHECK(*serialized_s == get_short()); CHECK(*serialized_l == get_long()); + + serialized_s->~StrT(); + serialized_l->~StrT(); } From 8c32e032a432265aff6677d2f07b9c7d1c584df0 Mon Sep 17 00:00:00 2001 From: freshFruict <75492585+freshFruict@users.noreply.github.com> Date: Fri, 24 Jan 2025 07:37:26 +0300 Subject: [PATCH 11/14] generic_string: avoid unnecessary reallocations on copy --- include/cista/containers/string.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/cista/containers/string.h b/include/cista/containers/string.h index 5606ac56..d9d1fada 100644 --- a/include/cista/containers/string.h +++ b/include/cista/containers/string.h @@ -100,11 +100,12 @@ struct generic_string { static constexpr msize_t short_length_limit = 15U / sizeof(CharT); void set_owning(CharT const* str, msize_t const len) { - reset(); if (str == nullptr || len == 0U) { - return; + return reset(); + } + if (capacity() < len) { + internal_change_capacity(len); } - internal_change_capacity(len); if (!is_short()) { h_.size_ = len; } @@ -157,8 +158,8 @@ struct generic_string { if (&s == this) { return; } - reset(); if (s.is_short()) { + reset(); std::memcpy(static_cast(this), &s, sizeof(s)); } else if (s.is_self_allocated()) { set_owning(s.data(), s.size()); From 33de1e101a578437c019cf46e0436017af466a11 Mon Sep 17 00:00:00 2001 From: freshFruict <75492585+freshFruict@users.noreply.github.com> Date: Mon, 3 Feb 2025 05:21:01 +0300 Subject: [PATCH 12/14] string: avoid shrinking on resize --- include/cista/containers/string.h | 12 +++++------- include/cista/serialization.h | 19 +++++++++++-------- test/cstring_serialize_test.cc | 30 ++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 15 deletions(-) diff --git a/include/cista/containers/string.h b/include/cista/containers/string.h index d9d1fada..1765cef4 100644 --- a/include/cista/containers/string.h +++ b/include/cista/containers/string.h @@ -563,18 +563,16 @@ struct basic_string : public generic_string { } void resize(msize_t new_size) { - if (new_size <= base::short_length_limit) { - base::internal_change_capacity(new_size); - return; - } if (new_size > base::capacity()) { base::internal_change_capacity(new_size); } - if (new_size < base::h_.size_) { + if (new_size < base::size()) { std::memset(base::data() + new_size, 0, - (base::h_.size_ - new_size) * sizeof(CharT)); + (base::size() - new_size) * sizeof(CharT)); + } + if (!base::is_short()) { + base::h_.size_ = new_size; } - base::h_.size_ = new_size; } void reserve(msize_t cap) { if (cap > base::capacity()) { diff --git a/include/cista/serialization.h b/include/cista/serialization.h index 88657a0c..f190de93 100644 --- a/include/cista/serialization.h +++ b/include/cista/serialization.h @@ -202,20 +202,23 @@ void serialize(Ctx& c, template void serialize(Ctx& c, generic_string const* origin, offset_t const pos) { using Type = generic_string; - auto str_convert_endian = [](Ctx& ctx, offset_t const start, - typename Type::CharT const* str, + using CharT = typename Type::CharT; + auto str_convert_endian = [](Ctx& ctx, offset_t const start, CharT const* str, offset_t const size) -> void { - if constexpr (sizeof(typename Type::CharT) > 1) { + if constexpr (sizeof(CharT) > 1) { for (offset_t i = 0; i < size; ++i) { - ctx.write( - start + i * static_cast(sizeof(typename Type::CharT)), - convert_endian(str[i])); + ctx.write(start + i * static_cast(sizeof(CharT)), + convert_endian(str[i])); } } }; - if (origin->is_short()) { - str_convert_endian(c, pos + cista_member_offset(Type, s_.s_), origin->s_.s_, + if (origin->size() <= Type::short_length_limit) { + Type short_str; + short_str.set_owning(origin->data(), origin->size()); + c.write(pos, short_str); + str_convert_endian(c, pos + cista_member_offset(Type, s_.s_), + origin->data(), static_cast(Type::short_length_limit)); return; } diff --git a/test/cstring_serialize_test.cc b/test/cstring_serialize_test.cc index 76b98a6a..6928c6bf 100644 --- a/test/cstring_serialize_test.cc +++ b/test/cstring_serialize_test.cc @@ -248,3 +248,33 @@ TEST_CASE_TEMPLATE("string serialization capacity", StrT, cista::raw::string, serialized_s->~StrT(); serialized_l->~StrT(); } + +TEST_CASE_TEMPLATE("string serialization long as short", StrT, + cista::raw::string, u16string, u32string) { + using CharT = typename StrT::CharT; + auto get_short = []() -> CharT const* { + void const* ptr; + switch (sizeof(CharT)) { + case sizeof(char): ptr = SHORT_STR; break; + case sizeof(char16_t): ptr = U16STR_SHORT; break; + case sizeof(char32_t): ptr = U32STR_SHORT; break; + } + return static_cast(ptr); + }; + + auto short_str = get_short(); + auto short_len = StrT::mstrlen(short_str); + + StrT s; + s.resize(256); + s = short_str; + cista::byte_buf buf = cista::serialize(s); + StrT* serialized = cista::deserialize(buf); + + CHECK(!s.is_short()); + CHECK(serialized->is_short()); + CHECK(s == short_str); + CHECK(*serialized == short_str); + CHECK(s.size() == short_len); + CHECK(serialized->size() == short_len); +} From e8c31046b092541dc2bdb57690321fe01b82d521 Mon Sep 17 00:00:00 2001 From: freshFruict <75492585+freshFruict@users.noreply.github.com> Date: Mon, 3 Feb 2025 13:37:51 +0300 Subject: [PATCH 13/14] generic_string: avoid deallocation on copy --- include/cista/containers/string.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/cista/containers/string.h b/include/cista/containers/string.h index 1765cef4..892e9fe9 100644 --- a/include/cista/containers/string.h +++ b/include/cista/containers/string.h @@ -158,10 +158,7 @@ struct generic_string { if (&s == this) { return; } - if (s.is_short()) { - reset(); - std::memcpy(static_cast(this), &s, sizeof(s)); - } else if (s.is_self_allocated()) { + if (s.is_short() || s.is_self_allocated()) { set_owning(s.data(), s.size()); } else { set_non_owning(s.data(), s.size()); From 1c7c97ea65aafcc227d45c37bb3d9b09ca646cf6 Mon Sep 17 00:00:00 2001 From: freshFruict <75492585+freshFruict@users.noreply.github.com> Date: Tue, 4 Feb 2025 16:34:06 +0300 Subject: [PATCH 14/14] generic_string: fix incorrect capacity of moved-from string in offset mode --- include/cista/containers/string.h | 3 +-- test/string_test.cc | 9 +++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/include/cista/containers/string.h b/include/cista/containers/string.h index 892e9fe9..679eaa3d 100644 --- a/include/cista/containers/string.h +++ b/include/cista/containers/string.h @@ -148,8 +148,7 @@ struct generic_string { } else { if (!s.is_short()) { h_.ptr_ = s.h_.ptr_; - s.h_.ptr_ = nullptr; - s.h_.size_ = 0U; + s.h_ = heap{}; } } } diff --git a/test/string_test.cc b/test/string_test.cc index 981011fa..efb745ee 100644 --- a/test/string_test.cc +++ b/test/string_test.cc @@ -119,6 +119,15 @@ TEST_CASE("string copy assign and copy construct") { CHECK(s2.view() == LONG_STR); } +TEST_CASE_TEMPLATE("string move", StrT, cista::raw::string, + cista::offset::string) { + StrT src = LONG_STR; + StrT dst = std::move(src); + CHECK(src.data() == nullptr); + CHECK(src.size() == 0); + CHECK(src.capacity() == 0); +} + TEST_CASE("string hash") { auto str = string{""}; auto h = cista::hash(str, cista::BASE_HASH);