Skip to content

Commit

Permalink
custom memmove implementation proposal. (#593)
Browse files Browse the repository at this point in the history
mostly like memcpy with optional bound checking but
capable of handling overlapping cases thus using
reverse copy instead.
  • Loading branch information
devnexen authored Nov 16, 2024
1 parent f7fe702 commit 01885f5
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 3 deletions.
48 changes: 48 additions & 0 deletions src/snmalloc/global/memcpy.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,16 @@ namespace snmalloc
}
}

template<size_t Size, size_t PrefetchOffset = 0>
SNMALLOC_FAST_PATH_INLINE void
block_reverse_copy(void* dst, const void* src, size_t len)
{
for (size_t i = (len - 1); int64_t(i + Size) >= 0; i -= Size)
{
copy_one<Size>(pointer_offset(dst, i), pointer_offset(src, i));
}
}

/**
* Perform an overlapping copy of the end. This will copy one (potentially
* unaligned) `T` from the end of the source to the end of the destination.
Expand Down Expand Up @@ -459,4 +469,42 @@ namespace snmalloc
Arch::copy(dst, src, len);
return orig_dst;
}

template<
bool Checked,
bool ReadsChecked = CheckReads,
typename Arch = DefaultArch>
SNMALLOC_FAST_PATH_INLINE void*
memmove(void* dst, const void* src, size_t len)
{
auto orig_dst = dst;
// we don't need to do external
// pointer checks if we hit it. It's also the fastest case, to encourage
// the compiler to favour the other cases.
if (SNMALLOC_UNLIKELY(len == 0 || dst == src))
{
return dst;
}

// Check the bounds of the arguments.
if (SNMALLOC_UNLIKELY(!check_bounds<(Checked && ReadsChecked)>(src, len)))
return report_fatal_bounds_error(
src, len, "memmove with source out of bounds of heap allocation");
if (SNMALLOC_UNLIKELY(!check_bounds<Checked>(dst, len)))
return report_fatal_bounds_error(
dst, len, "memmove with destination out of bounds of heap allocation");

if ((address_cast(dst) - address_cast(src)) < len)
{
// slow 'safe' reverse copy, we avoid optimised rollouts
// to cope with typical memmove use cases, moving
// one element to another address within the same
// contiguous space.
block_reverse_copy<1>(dst, src, len);
return dst;
}

Arch::copy(dst, src, len);
return orig_dst;
}
} // namespace snmalloc
9 changes: 9 additions & 0 deletions src/snmalloc/override/memcpy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,13 @@ extern "C"
{
return snmalloc::memcpy<true>(dst, src, len);
}

/**
* Snmalloc checked memmove.
*/
SNMALLOC_EXPORT void*
SNMALLOC_NAME_MANGLE(memmove)(void* dst, const void* src, size_t len)
{
return snmalloc::memmove<true>(dst, src, len);
}
}
79 changes: 76 additions & 3 deletions src/test/func/memcpy/func-memcpy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,24 @@ extern "C" void abort()
* fills one with a well-known pattern, and then copies subsets of this at
* one-byte increments to a target. This gives us unaligned starts.
*/
template<bool overlap>
void check_size(size_t size)
{
START_TEST("checking {}-byte memcpy", size);
if constexpr (!overlap)
{
START_TEST("checking {}-byte memcpy", size);
}
else
{
START_TEST("checking {}-byte memmove", size);
}
auto* s = static_cast<unsigned char*>(my_malloc(size + 1));
auto* d = static_cast<unsigned char*>(my_malloc(size + 1));
d[size] = 0;
s[size] = 255;
for (size_t start = 0; start < size; start++)
{
void* ret;
unsigned char* src = s + start;
unsigned char* dst = d + start;
size_t sz = (size - start);
Expand All @@ -88,7 +97,14 @@ void check_size(size_t size)
{
dst[i] = 0;
}
void* ret = my_memcpy(dst, src, sz);
if constexpr (!overlap)
{
ret = my_memcpy(dst, src, sz);
}
else
{
ret = my_memmove(dst, src, sz);
}
EXPECT(ret == dst, "Return value should be {}, was {}", dst, ret);
for (size_t i = 0; i < sz; ++i)
{
Expand Down Expand Up @@ -147,6 +163,50 @@ void check_bounds(size_t size, size_t out_of_bounds)
my_free(d);
}

void check_overlaps1()
{
size_t size = 16;
START_TEST("memmove overlaps1");
auto* s = static_cast<unsigned int*>(my_malloc(size * sizeof(unsigned int)));
for (size_t i = 0; i < size; ++i)
{
s[i] = static_cast<unsigned int>(i);
}
my_memmove(&s[2], &s[4], sizeof(s[0]));
EXPECT(s[2] == s[4], "overlap error: {} {}", s[2], s[4]);
my_memmove(&s[15], &s[5], sizeof(s[0]));
EXPECT(s[15] == s[5], "overlap error: {} {}", s[15], s[5]);
auto ptr = s;
my_memmove(ptr, s, size * sizeof(s[0]));
EXPECT(ptr == s, "overlap error: {} {}", ptr, s);
my_free(s);
}

template<bool after>
void check_overlaps2(size_t size)
{
START_TEST("memmove overlaps2, size {}", size);
auto sz = size / 2;
auto offset = size / 2;
auto* s = static_cast<unsigned int*>(my_malloc(size * sizeof(unsigned int)));
for (size_t i = 0; i < size; ++i)
{
s[i] = static_cast<unsigned int>(i);
}
auto dst = after ? s + offset : s;
auto src = after ? s : s + offset;
size_t i = after ? 0 : offset;
size_t u = 0;
my_memmove(dst, src, sz * sizeof(unsigned int));
while (u < sz)
{
EXPECT(dst[u] == i, "overlap error: {} {}", dst[u], i);
u++;
i++;
}
my_free(s);
}

int main()
{
// Skip the checks that expect bounds checks to fail when we are not the
Expand Down Expand Up @@ -175,7 +235,20 @@ int main()
# endif
for (size_t x = 0; x < 2048; x++)
{
check_size(x);
check_size<false>(x);
}

for (size_t x = 0; x < 2048; x++)
{
check_size<true>(x);
}

check_overlaps1();

for (size_t x = 8; x < 256; x += 64)
{
check_overlaps2<false>(x);
check_overlaps2<true>(x);
}
}
#endif

0 comments on commit 01885f5

Please sign in to comment.