Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

string: add length management functions #243

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 113 additions & 33 deletions include/cista/containers/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <string_view>

#include "cista/containers/ptr.h"
#include "cista/endian/detection.h"
#include "cista/exception.h"
#include "cista/type_traits.h"

Expand Down Expand Up @@ -75,9 +76,12 @@ struct generic_string {
friend CharT* end(generic_string& s) { return s.end(); }

bool is_short() const noexcept { return s_.is_short_; }
bool is_self_allocated() const noexcept {
return !is_short() && (h_.capacity_ != 0);
}

void reset() noexcept {
if (!h_.is_short_ && h_.ptr_ != nullptr && h_.self_allocated_) {
if (is_self_allocated()) {
std::free(data());
}
h_ = heap{};
Expand All @@ -96,25 +100,16 @@ struct generic_string {
static constexpr msize_t short_length_limit = 15U / sizeof(CharT);

void set_owning(CharT const* str, msize_t const len) {
reset();
if (str == nullptr || len == 0U) {
return;
return reset();
}
s_.is_short_ = (len <= short_length_limit);
if (s_.is_short_) {
std::memcpy(s_.s_, str, len * sizeof(CharT));
for (auto i = len; i < short_length_limit; ++i) {
s_.s_[i] = 0;
}
} else {
h_.ptr_ = static_cast<CharT*>(std::malloc(len * sizeof(CharT)));
if (h_.ptr_ == nullptr) {
throw_exception(std::bad_alloc{});
}
if (capacity() < len) {
internal_change_capacity(len);
}
if (!is_short()) {
h_.size_ = len;
h_.self_allocated_ = true;
std::memcpy(data(), str, len * sizeof(CharT));
}
std::memcpy(data(), str, len * sizeof(CharT));
}

void set_non_owning(std::basic_string<CharT> const& v) {
Expand All @@ -137,8 +132,7 @@ struct generic_string {
return set_owning(str, len);
}

h_.is_short_ = false;
h_.self_allocated_ = false;
h_ = heap{};
h_.ptr_ = str;
h_.size_ = len;
}
Expand All @@ -154,8 +148,7 @@ struct generic_string {
} else {
if (!s.is_short()) {
h_.ptr_ = s.h_.ptr_;
s.h_.ptr_ = nullptr;
s.h_.size_ = 0U;
s.h_ = heap{};
}
}
}
Expand All @@ -164,16 +157,71 @@ struct generic_string {
if (&s == this) {
return;
}
reset();
if (s.is_short()) {
std::memcpy(static_cast<void*>(this), &s, sizeof(s));
} else if (s.h_.self_allocated_) {
if (s.is_short() || s.is_self_allocated()) {
set_owning(s.data(), s.size());
} else {
set_non_owning(s.data(), s.size());
}
}

void internal_change_capacity(msize_t new_capacity) {
auto initialize_buffer = [](CharT* dest, msize_t capacity, CharT const* src,
msize_t size) -> void {
if (size && dest != src) {
std::memcpy(dest, src, size * sizeof(CharT));
}
std::memset(dest + size, 0, (capacity - size) * sizeof(CharT));
};
auto make_heap = [](CharT* cur_buf, msize_t new_cap) -> heap {
new_cap = (new_cap + msize_t{0xFF}) & ~msize_t{0xFF};
heap h{};
#ifdef CISTA_LITTLE_ENDIAN
h.capacity_ = new_cap;
#else
h.capacity_ = new_cap >> 8;
#endif
h.ptr_ =
static_cast<CharT*>(std::realloc(cur_buf, new_cap * sizeof(CharT)));
if (!h.ptr_) {
throw_exception(std::bad_alloc{});
}
return h;
};

if (new_capacity == 0) {
reset();
return;
}
msize_t new_size = std::min(size(), new_capacity);
if (new_capacity <= short_length_limit) {
stack s{};
initialize_buffer(s.s_, short_length_limit, data(), new_size);
if (!is_short()) {
reset();
}
s_ = s;
} else {
heap h{};
if (is_self_allocated()) {
h = make_heap(data(), new_capacity);
initialize_buffer(const_cast<CharT*>(h.ptr()), h.capacity(), h.ptr(),
new_size);
} else {
h = make_heap(nullptr, new_capacity);
initialize_buffer(const_cast<CharT*>(h.ptr()), h.capacity(), data(),
new_size);
}
h.size_ = new_size;
h_ = h;
}
}
constexpr msize_t capacity() const noexcept {
if (is_short()) {
return short_length_limit;
}
return h_.capacity();
}

bool empty() const noexcept { return size() == 0U; }
std::basic_string_view<CharT> view() const noexcept {
return {data(), size()};
Expand Down Expand Up @@ -331,11 +379,7 @@ struct generic_string {
}

CharT const* internal_data() const noexcept {
if constexpr (std::is_pointer_v<Ptr>) {
return is_short() ? s_.s_ : h_.ptr_;
} else {
return is_short() ? s_.s_ : h_.ptr_.get();
}
return is_short() ? s_.s_ : h_.ptr();
}

CharT* data() noexcept { return const_cast<CharT*>(internal_data()); }
Expand All @@ -352,7 +396,7 @@ struct generic_string {
}

generic_string& erase(msize_t const pos, msize_t const n) {
if (!is_short() && !h_.self_allocated_) {
if (!is_short() && !is_self_allocated()) {
set_owning(view());
}
auto const size_before = size();
Expand Down Expand Up @@ -424,11 +468,27 @@ struct generic_string {
}

struct heap {
bool is_short_{false};
bool self_allocated_{false};
std::uint16_t __fill__{0};
union {
bool is_short_;
std::uint32_t capacity_{0};
};
std::uint32_t size_{0};
Ptr ptr_{nullptr};

std::uint32_t capacity() const noexcept {
#ifdef CISTA_LITTLE_ENDIAN
return capacity_;
#else
return capacity_ << 8;
#endif
}
CharT const* ptr() const noexcept {
if constexpr (std::is_pointer_v<Ptr>) {
return ptr_;
} else {
return ptr_.get();
}
}
};

struct stack {
Expand All @@ -448,6 +508,7 @@ struct generic_string {
template <typename Ptr>
struct basic_string : public generic_string<Ptr> {
using base = generic_string<Ptr>;
using msize_t = typename base::msize_t;
using CharT = typename base::CharT;

using base::base;
Expand Down Expand Up @@ -496,6 +557,25 @@ struct basic_string : public generic_string<Ptr> {
base::set_owning(s);
return *this;
}

void resize(msize_t new_size) {
if (new_size > base::capacity()) {
base::internal_change_capacity(new_size);
}
if (new_size < base::size()) {
std::memset(base::data() + new_size, 0,
(base::size() - new_size) * sizeof(CharT));
}
if (!base::is_short()) {
base::h_.size_ = new_size;
}
}
void reserve(msize_t cap) {
if (cap > base::capacity()) {
base::internal_change_capacity(cap);
}
}
void shrink_to_fit() { base::internal_change_capacity(base::size()); }
};

template <typename Ptr>
Expand Down
24 changes: 13 additions & 11 deletions include/cista/serialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,20 +202,23 @@ void serialize(Ctx& c,
template <typename Ctx, typename Ptr>
void serialize(Ctx& c, generic_string<Ptr> const* origin, offset_t const pos) {
using Type = generic_string<Ptr>;
auto str_convert_endian = [](Ctx& ctx, offset_t const start,
typename Type::CharT const* str,
using CharT = typename Type::CharT;
auto str_convert_endian = [](Ctx& ctx, offset_t const start, CharT const* str,
offset_t const size) -> void {
if constexpr (sizeof(typename Type::CharT) > 1) {
if constexpr (sizeof(CharT) > 1) {
for (offset_t i = 0; i < size; ++i) {
ctx.write(
start + i * static_cast<offset_t>(sizeof(typename Type::CharT)),
convert_endian<Ctx::MODE>(str[i]));
ctx.write(start + i * static_cast<offset_t>(sizeof(CharT)),
convert_endian<Ctx::MODE>(str[i]));
}
}
};

if (origin->is_short()) {
str_convert_endian(c, pos + cista_member_offset(Type, s_.s_), origin->s_.s_,
if (origin->size() <= Type::short_length_limit) {
Type short_str;
short_str.set_owning(origin->data(), origin->size());
c.write(pos, short_str);
str_convert_endian(c, pos + cista_member_offset(Type, s_.s_),
origin->data(),
static_cast<offset_t>(Type::short_length_limit));
return;
}
Expand All @@ -236,7 +239,7 @@ void serialize(Ctx& c, generic_string<Ptr> const* origin, offset_t const pos) {
: start - cista_member_offset(Type, h_.ptr_) - pos));
c.write(pos + cista_member_offset(Type, h_.size_),
convert_endian<Ctx::MODE>(origin->h_.size_));
c.write(pos + cista_member_offset(Type, h_.self_allocated_), false);
c.write(pos + cista_member_offset(Type, h_.capacity_), std::uint32_t{0});
}

template <typename Ctx, typename T, typename SizeType,
Expand Down Expand Up @@ -829,8 +832,7 @@ void check_state(Ctx const& c, generic_string<Ptr>* el) {
if (!el->is_short()) {
c.check_ptr(el->h_.ptr_,
el->h_.size_ * sizeof(typename generic_string<Ptr>::CharT));
c.check_bool(el->h_.self_allocated_);
c.require(!el->h_.self_allocated_, "string self-allocated");
c.require(!el->is_self_allocated(), "string self-allocated");
c.require((el->h_.size_ == 0) == (el->h_.ptr_ == nullptr),
"str size=0 <=> ptr=0");
}
Expand Down
75 changes: 75 additions & 0 deletions test/cstring_serialize_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -203,3 +203,78 @@ TEST_CASE("u32string serialization endian long") {

CHECK(*serialized_be == U32STR_LONG_CORNER_CASE);
}

TEST_CASE_TEMPLATE("string serialization capacity", StrT, cista::raw::string,
u16string, u32string) {
using CharT = typename StrT::CharT;
auto get_short = []() -> CharT const* {
void const* ptr;
switch (sizeof(CharT)) {
case sizeof(char): ptr = SHORT_STR; break;
case sizeof(char16_t): ptr = U16STR_SHORT; break;
case sizeof(char32_t): ptr = U32STR_SHORT; break;
}
return static_cast<CharT const*>(ptr);
};
auto get_long = []() -> CharT const* {
void const* ptr;
switch (sizeof(CharT)) {
case sizeof(char): ptr = LONG_STR; break;
case sizeof(char16_t): ptr = U16STR_LONG; break;
case sizeof(char32_t): ptr = U32STR_LONG; break;
}
return static_cast<CharT const*>(ptr);
};

StrT s_s = get_short(), s_l = get_long();
cista::byte_buf buf_s = cista::serialize(s_s), buf_l = cista::serialize(s_l);
StrT *serialized_s = cista::deserialize<StrT>(buf_s),
*serialized_l = cista::deserialize<StrT>(buf_l);
CharT const *ptr_s = serialized_s->data(), *ptr_l = serialized_l->data();

CHECK(serialized_s->capacity() == StrT::short_length_limit);
CHECK(serialized_l->capacity() == 0);

serialized_s->shrink_to_fit();
serialized_l->shrink_to_fit();

CHECK(serialized_s->capacity() == StrT::short_length_limit);
CHECK(serialized_l->capacity() == 256);
CHECK(ptr_s == serialized_s->data());
CHECK(ptr_l != serialized_l->data());
CHECK(*serialized_s == get_short());
CHECK(*serialized_l == get_long());

serialized_s->~StrT();
serialized_l->~StrT();
}

TEST_CASE_TEMPLATE("string serialization long as short", StrT,
cista::raw::string, u16string, u32string) {
using CharT = typename StrT::CharT;
auto get_short = []() -> CharT const* {
void const* ptr;
switch (sizeof(CharT)) {
case sizeof(char): ptr = SHORT_STR; break;
case sizeof(char16_t): ptr = U16STR_SHORT; break;
case sizeof(char32_t): ptr = U32STR_SHORT; break;
}
return static_cast<CharT const*>(ptr);
};

auto short_str = get_short();
auto short_len = StrT::mstrlen(short_str);

StrT s;
s.resize(256);
s = short_str;
cista::byte_buf buf = cista::serialize(s);
StrT* serialized = cista::deserialize<StrT>(buf);

CHECK(!s.is_short());
CHECK(serialized->is_short());
CHECK(s == short_str);
CHECK(*serialized == short_str);
CHECK(s.size() == short_len);
CHECK(serialized->size() == short_len);
}
Loading