areaIntegration #1
62
core/klibc.c
62
core/klibc.c
@ -16,14 +16,66 @@ int memcmp(const void *aptr, const void *bptr, size_t size)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void *memcpy(void *dst, const void *src, size_t n)
|
// Inspirated by https://interrupt.memfault.com/blog/memcpy-newlib-nano
|
||||||
|
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
|
||||||
|
#define UNALIGNED(X, Y) \
|
||||||
|
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
|
||||||
|
|
||||||
|
/* How many bytes are copied each iteration of the 4X unrolled loop. */
|
||||||
|
#define BIGBLOCKSIZE (sizeof (long) << 2)
|
||||||
|
|
||||||
|
/* How many bytes are copied each iteration of the word copy loop. */
|
||||||
|
#define LITTLEBLOCKSIZE (sizeof (long))
|
||||||
|
|
||||||
|
/* Threshhold for punting to the byte copier. */
|
||||||
|
#define TOO_SMALL(LEN) ((LEN) < BIGBLOCKSIZE)
|
||||||
|
|
||||||
|
void *memcpy(void *dst0, const void *src0, size_t len0)
|
||||||
{
|
{
|
||||||
char *dstChar = dst;
|
#if 0
|
||||||
const char *srcChar = src;
|
char *dstChar = dst0;
|
||||||
for (size_t i = 0; i < n; i++) {
|
const char *srcChar = src0;
|
||||||
|
for (size_t i = 0; i < len0; i++) {
|
||||||
*(dstChar++) = *(srcChar++);
|
*(dstChar++) = *(srcChar++);
|
||||||
}
|
}
|
||||||
return dst;
|
return dst0;
|
||||||
|
#else
|
||||||
|
char *dst = dst0;
|
||||||
|
const char *src = src0;
|
||||||
|
long *aligned_dst;
|
||||||
|
const long *aligned_src;
|
||||||
|
|
||||||
|
/* If the size is small, or either SRC or DST is unaligned,
|
||||||
|
then punt into the byte copy loop. This should be rare. */
|
||||||
|
if (!TOO_SMALL(len0) && !UNALIGNED(src, dst)) {
|
||||||
|
aligned_dst = (long *)dst;
|
||||||
|
aligned_src = (long *)src;
|
||||||
|
|
||||||
|
/* Copy 4X long words at a time if possible. */
|
||||||
|
while (len0 >= BIGBLOCKSIZE) {
|
||||||
|
*aligned_dst++ = *aligned_src++;
|
||||||
|
*aligned_dst++ = *aligned_src++;
|
||||||
|
*aligned_dst++ = *aligned_src++;
|
||||||
|
*aligned_dst++ = *aligned_src++;
|
||||||
|
len0 -= BIGBLOCKSIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy one long word at a time if possible. */
|
||||||
|
while (len0 >= LITTLEBLOCKSIZE) {
|
||||||
|
*aligned_dst++ = *aligned_src++;
|
||||||
|
len0 -= LITTLEBLOCKSIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Pick up any residual with a byte copier. */
|
||||||
|
dst = (char *)aligned_dst;
|
||||||
|
src = (char *)aligned_src;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (len0--)
|
||||||
|
*dst++ = *src++;
|
||||||
|
|
||||||
|
return dst0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void *memset(void *src, int c, size_t n)
|
void *memset(void *src, int c, size_t n)
|
||||||
|
@ -27,3 +27,10 @@ unsigned long usecs_to_jiffies(const unsigned int u)
|
|||||||
// This could overflow
|
// This could overflow
|
||||||
return (u * HZ) / 1000000L;
|
return (u * HZ) / 1000000L;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#include <x86intrin.h>
|
||||||
|
inline uint64_t read_cycle_counter()
|
||||||
|
{
|
||||||
|
uint64_t tsc = __rdtsc();
|
||||||
|
return tsc;
|
||||||
|
}
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
#define HZ 100
|
#define HZ 100
|
||||||
/*
|
/*
|
||||||
@ -50,3 +51,5 @@ unsigned int jiffies_to_msecs(const unsigned long j);
|
|||||||
unsigned int jiffies_to_usecs(const unsigned long j);
|
unsigned int jiffies_to_usecs(const unsigned long j);
|
||||||
unsigned long msecs_to_jiffies(const unsigned int m);
|
unsigned long msecs_to_jiffies(const unsigned int m);
|
||||||
unsigned long usecs_to_jiffies(const unsigned int u);
|
unsigned long usecs_to_jiffies(const unsigned int u);
|
||||||
|
|
||||||
|
uint64_t read_cycle_counter();
|
||||||
|
30
tests/test.c
30
tests/test.c
@ -11,6 +11,29 @@
|
|||||||
#include "synchro.h"
|
#include "synchro.h"
|
||||||
#include "time.h"
|
#include "time.h"
|
||||||
|
|
||||||
|
void testMemcpyPerf()
|
||||||
|
{
|
||||||
|
struct test_struct {
|
||||||
|
char data[4096];
|
||||||
|
};
|
||||||
|
// instantiate 2 structs. for our purposes, we don't care what data is in
|
||||||
|
// there. set them to `volatile` so the compiler won't optimize away what we
|
||||||
|
// do with them
|
||||||
|
volatile struct test_struct dest, source;
|
||||||
|
|
||||||
|
printf("Test Memcpy perf\n");
|
||||||
|
// run through powers-of-two memcpy's, printing stats for each test
|
||||||
|
for (size_t len = 1; len <= sizeof(dest); len <<= 1) {
|
||||||
|
uint32_t start = read_cycle_counter(); // << Start count
|
||||||
|
memcpy((void *)&dest, (void *)&source, len);
|
||||||
|
uint32_t stop = read_cycle_counter(); // << Stop count
|
||||||
|
|
||||||
|
// print out the cycles consumed
|
||||||
|
printf("len = %d, %d %d cyccnt = %d, cycles/byte = %d\n", (uint32_t)len, stop, start,
|
||||||
|
stop - start, (stop - start) / len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void testPhymem(void)
|
void testPhymem(void)
|
||||||
{
|
{
|
||||||
printf("Testing memory PHY\n");
|
printf("Testing memory PHY\n");
|
||||||
@ -47,7 +70,8 @@ void testPhymem(void)
|
|||||||
assert(freePageStatFree == freePageStatBegin);
|
assert(freePageStatFree == freePageStatBegin);
|
||||||
assert(usedPageStatFree == usedPageStatBegin);
|
assert(usedPageStatFree == usedPageStatBegin);
|
||||||
|
|
||||||
assertmsg((page = (struct phyMemDesc *)allocPhyPage(1)) != NULL, "Cannot allocate memory\n");
|
assertmsg((page = (struct phyMemDesc *)allocPhyPage(1)) != NULL,
|
||||||
|
"Cannot allocate memory\n");
|
||||||
unrefPhyPage((ulong)page);
|
unrefPhyPage((ulong)page);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -137,7 +161,8 @@ static void testPaging(void)
|
|||||||
}
|
}
|
||||||
printf("%d pages freed\n", freeCount);
|
printf("%d pages freed\n", freeCount);
|
||||||
|
|
||||||
assertmsg((page = (struct phyMemDesc *)allocPhyPage(1)) != NULL, "Cannot allocate memory\n");
|
assertmsg((page = (struct phyMemDesc *)allocPhyPage(1)) != NULL,
|
||||||
|
"Cannot allocate memory\n");
|
||||||
unrefPhyPage((ulong)page);
|
unrefPhyPage((ulong)page);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -315,6 +340,7 @@ void testKthread()
|
|||||||
|
|
||||||
void run_test(void)
|
void run_test(void)
|
||||||
{
|
{
|
||||||
|
testMemcpyPerf();
|
||||||
{
|
{
|
||||||
int test = 1000;
|
int test = 1000;
|
||||||
long long int test64 = 0x100000000;
|
long long int test64 = 0x100000000;
|
||||||
|
Loading…
Reference in New Issue
Block a user