From a96f72b0af1f0dde85ec6026ae2a86ccc858b9aa Mon Sep 17 00:00:00 2001 From: Mathieu Maret Date: Wed, 15 Sep 2021 21:58:06 +0200 Subject: [PATCH] Add alternative memcpy implementation --- core/klibc.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 5 deletions(-) diff --git a/core/klibc.c b/core/klibc.c index 1a928e2..e5c9537 100644 --- a/core/klibc.c +++ b/core/klibc.c @@ -16,14 +16,66 @@ int memcmp(const void *aptr, const void *bptr, size_t size) return 0; } -void *memcpy(void *dst, const void *src, size_t n) +// Inspirated by https://interrupt.memfault.com/blog/memcpy-newlib-nano +/* Nonzero if either X or Y is not aligned on a "long" boundary. */ +#define UNALIGNED(X, Y) \ + (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1))) + +/* How many bytes are copied each iteration of the 4X unrolled loop. */ +#define BIGBLOCKSIZE (sizeof (long) << 2) + +/* How many bytes are copied each iteration of the word copy loop. */ +#define LITTLEBLOCKSIZE (sizeof (long)) + +/* Threshhold for punting to the byte copier. */ +#define TOO_SMALL(LEN) ((LEN) < BIGBLOCKSIZE) + +void *memcpy(void *dst0, const void *src0, size_t len0) { - char *dstChar = dst; - const char *srcChar = src; - for (size_t i = 0; i < n; i++) { +#if 0 + char *dstChar = dst0; + const char *srcChar = src0; + for (size_t i = 0; i < len0; i++) { *(dstChar++) = *(srcChar++); } - return dst; + return dst0; +#else + char *dst = dst0; + const char *src = src0; + long *aligned_dst; + const long *aligned_src; + + /* If the size is small, or either SRC or DST is unaligned, + then punt into the byte copy loop. This should be rare. */ + if (!TOO_SMALL(len0) && !UNALIGNED(src, dst)) { + aligned_dst = (long *)dst; + aligned_src = (long *)src; + + /* Copy 4X long words at a time if possible. */ + while (len0 >= BIGBLOCKSIZE) { + *aligned_dst++ = *aligned_src++; + *aligned_dst++ = *aligned_src++; + *aligned_dst++ = *aligned_src++; + *aligned_dst++ = *aligned_src++; + len0 -= BIGBLOCKSIZE; + } + + /* Copy one long word at a time if possible. */ + while (len0 >= LITTLEBLOCKSIZE) { + *aligned_dst++ = *aligned_src++; + len0 -= LITTLEBLOCKSIZE; + } + + /* Pick up any residual with a byte copier. */ + dst = (char *)aligned_dst; + src = (char *)aligned_src; + } + + while (len0--) + *dst++ = *src++; + + return dst0; +#endif } void *memset(void *src, int c, size_t n)