areaIntegration #1
62
core/klibc.c
62
core/klibc.c
@ -16,14 +16,66 @@ int memcmp(const void *aptr, const void *bptr, size_t size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *memcpy(void *dst, const void *src, size_t n)
|
||||
// Inspirated by https://interrupt.memfault.com/blog/memcpy-newlib-nano
|
||||
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
|
||||
#define UNALIGNED(X, Y) \
|
||||
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
|
||||
|
||||
/* How many bytes are copied each iteration of the 4X unrolled loop. */
|
||||
#define BIGBLOCKSIZE (sizeof (long) << 2)
|
||||
|
||||
/* How many bytes are copied each iteration of the word copy loop. */
|
||||
#define LITTLEBLOCKSIZE (sizeof (long))
|
||||
|
||||
/* Threshhold for punting to the byte copier. */
|
||||
#define TOO_SMALL(LEN) ((LEN) < BIGBLOCKSIZE)
|
||||
|
||||
void *memcpy(void *dst0, const void *src0, size_t len0)
|
||||
{
|
||||
char *dstChar = dst;
|
||||
const char *srcChar = src;
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
#if 0
|
||||
char *dstChar = dst0;
|
||||
const char *srcChar = src0;
|
||||
for (size_t i = 0; i < len0; i++) {
|
||||
*(dstChar++) = *(srcChar++);
|
||||
}
|
||||
return dst;
|
||||
return dst0;
|
||||
#else
|
||||
char *dst = dst0;
|
||||
const char *src = src0;
|
||||
long *aligned_dst;
|
||||
const long *aligned_src;
|
||||
|
||||
/* If the size is small, or either SRC or DST is unaligned,
|
||||
then punt into the byte copy loop. This should be rare. */
|
||||
if (!TOO_SMALL(len0) && !UNALIGNED(src, dst)) {
|
||||
aligned_dst = (long *)dst;
|
||||
aligned_src = (long *)src;
|
||||
|
||||
/* Copy 4X long words at a time if possible. */
|
||||
while (len0 >= BIGBLOCKSIZE) {
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
len0 -= BIGBLOCKSIZE;
|
||||
}
|
||||
|
||||
/* Copy one long word at a time if possible. */
|
||||
while (len0 >= LITTLEBLOCKSIZE) {
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
len0 -= LITTLEBLOCKSIZE;
|
||||
}
|
||||
|
||||
/* Pick up any residual with a byte copier. */
|
||||
dst = (char *)aligned_dst;
|
||||
src = (char *)aligned_src;
|
||||
}
|
||||
|
||||
while (len0--)
|
||||
*dst++ = *src++;
|
||||
|
||||
return dst0;
|
||||
#endif
|
||||
}
|
||||
|
||||
void *memset(void *src, int c, size_t n)
|
||||
|
@ -27,3 +27,10 @@ unsigned long usecs_to_jiffies(const unsigned int u)
|
||||
// This could overflow
|
||||
return (u * HZ) / 1000000L;
|
||||
}
|
||||
|
||||
#include <x86intrin.h>
|
||||
inline uint64_t read_cycle_counter()
|
||||
{
|
||||
uint64_t tsc = __rdtsc();
|
||||
return tsc;
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
|
||||
#define HZ 100
|
||||
/*
|
||||
@ -50,3 +51,5 @@ unsigned int jiffies_to_msecs(const unsigned long j);
|
||||
unsigned int jiffies_to_usecs(const unsigned long j);
|
||||
unsigned long msecs_to_jiffies(const unsigned int m);
|
||||
unsigned long usecs_to_jiffies(const unsigned int u);
|
||||
|
||||
uint64_t read_cycle_counter();
|
||||
|
30
tests/test.c
30
tests/test.c
@ -11,6 +11,29 @@
|
||||
#include "synchro.h"
|
||||
#include "time.h"
|
||||
|
||||
void testMemcpyPerf()
|
||||
{
|
||||
struct test_struct {
|
||||
char data[4096];
|
||||
};
|
||||
// instantiate 2 structs. for our purposes, we don't care what data is in
|
||||
// there. set them to `volatile` so the compiler won't optimize away what we
|
||||
// do with them
|
||||
volatile struct test_struct dest, source;
|
||||
|
||||
printf("Test Memcpy perf\n");
|
||||
// run through powers-of-two memcpy's, printing stats for each test
|
||||
for (size_t len = 1; len <= sizeof(dest); len <<= 1) {
|
||||
uint32_t start = read_cycle_counter(); // << Start count
|
||||
memcpy((void *)&dest, (void *)&source, len);
|
||||
uint32_t stop = read_cycle_counter(); // << Stop count
|
||||
|
||||
// print out the cycles consumed
|
||||
printf("len = %d, %d %d cyccnt = %d, cycles/byte = %d\n", (uint32_t)len, stop, start,
|
||||
stop - start, (stop - start) / len);
|
||||
}
|
||||
}
|
||||
|
||||
void testPhymem(void)
|
||||
{
|
||||
printf("Testing memory PHY\n");
|
||||
@ -47,7 +70,8 @@ void testPhymem(void)
|
||||
assert(freePageStatFree == freePageStatBegin);
|
||||
assert(usedPageStatFree == usedPageStatBegin);
|
||||
|
||||
assertmsg((page = (struct phyMemDesc *)allocPhyPage(1)) != NULL, "Cannot allocate memory\n");
|
||||
assertmsg((page = (struct phyMemDesc *)allocPhyPage(1)) != NULL,
|
||||
"Cannot allocate memory\n");
|
||||
unrefPhyPage((ulong)page);
|
||||
}
|
||||
|
||||
@ -137,7 +161,8 @@ static void testPaging(void)
|
||||
}
|
||||
printf("%d pages freed\n", freeCount);
|
||||
|
||||
assertmsg((page = (struct phyMemDesc *)allocPhyPage(1)) != NULL, "Cannot allocate memory\n");
|
||||
assertmsg((page = (struct phyMemDesc *)allocPhyPage(1)) != NULL,
|
||||
"Cannot allocate memory\n");
|
||||
unrefPhyPage((ulong)page);
|
||||
}
|
||||
|
||||
@ -315,6 +340,7 @@ void testKthread()
|
||||
|
||||
void run_test(void)
|
||||
{
|
||||
testMemcpyPerf();
|
||||
{
|
||||
int test = 1000;
|
||||
long long int test64 = 0x100000000;
|
||||
|
Loading…
x
Reference in New Issue
Block a user