From 5938aa9cfb2e5390d38058b8b370d9319228860d Mon Sep 17 00:00:00 2001 From: Julian Barathieu Date: Wed, 2 Jan 2019 14:51:40 +0100 Subject: [PATCH] Better mem*() stuff --- src/kaleid/common/convert.c | 13 ---- src/kaleid/common/memory.c | 137 +++++++++++++++++++++++------------ src/kaleid/common/memsub.c | 37 ---------- src/kaleid/include/kallims.h | 16 ++++ 4 files changed, 105 insertions(+), 98 deletions(-) delete mode 100644 src/kaleid/common/memsub.c diff --git a/src/kaleid/common/convert.c b/src/kaleid/common/convert.c index 19c9bbe..c18765c 100644 --- a/src/kaleid/common/convert.c +++ b/src/kaleid/common/convert.c @@ -14,19 +14,6 @@ // static const char digits[36] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - -#if defined(_NEED_UTOA) || defined(_NEED_ULTOA) -#define _S unsigned -#else -#define _S -#endif - -#if defined (_NEED_ITOA) || defined(_NEED_UTOA) -#define _T int -#else -#define _T long -#endif - // // Integer to string in any base between 2 and 36 (included) // diff --git a/src/kaleid/common/memory.c b/src/kaleid/common/memory.c index 2ccf2e8..c88bd43 100644 --- a/src/kaleid/common/memory.c +++ b/src/kaleid/common/memory.c @@ -14,49 +14,34 @@ // memset() family // //------------------------------------------// -// -// Set "qwords"-many aligned qwords starting from ptr to val -// -static inline void *_memset_internal(void *ptr, ulong uval, size_t qwords) -{ - size_t n; - ulong *uptr = (ulong *)ptr; - - // aligned memory write - for (n = 0; n < qwords; n++) { - *uptr++ = uval; - } - - return ptr; -} - // // Set "bytes"-many bytes starting from ptr to val // -void *memset(void *ptr, int val, size_t bytes) +void *memsetb(void *ptr, int val, size_t bytes) { uchar *uptr = (uchar *)ptr; - const size_t qwords = bytes/QWORD_SIZE; - - // get rid of everything after the first byte - val = val & 0xFF; // deal with bytes before start of the first aligned qword while (((ulong)uptr % QWORD_ALIGN) > 0 && bytes--) { *uptr++ = (uchar)val; } - // move qword by qword - if (qwords) { + // we're qword-aligned now + if (bytes > QWORD_SIZE) { const ulong uval = ((ulong)val << 56) | ((ulong)val << 48) | ((ulong)val << 40) | ((ulong)val << 32) | ((ulong)val << 24) | ((ulong)val << 16) | ((ulong)val << 8) | ((ulong)val); - _memset_internal(uptr, uval, qwords); + ulong *uqptr = (ulong *)ptr; - uptr = (uchar *) ( (ulong)uptr / (qwords * QWORD_SIZE) ); - bytes %= QWORD_SIZE; + // move qword by qword + while (bytes > QWORD_SIZE) { + *uqptr++ = uval; + bytes -= QWORD_SIZE; + } + + uptr = (uchar *)(ulong)uqptr; } // deal with what's left @@ -74,10 +59,7 @@ void *memsetw(void *ptr, int val, size_t words) { ushort *uptr = (ushort *)ptr; - // get rid of everything after the first word - val = val & 0xFFFF; - - // can we do this an aligned way? + // can't we do this an aligned way? if unlikely (((ulong)uptr % WORD_ALIGN) > 0) { // no, we can't align ourselves while (words--) { @@ -93,17 +75,20 @@ void *memsetw(void *ptr, int val, size_t words) *uptr++ = (ushort)val; } - const size_t qwords = (words * WORD_SIZE)/QWORD_SIZE; - - // move qword by qword - if (qwords) { + // we're aligned for sure + if (words > QWORDS_TO_WORDS(1)) { const ulong uval = ((ulong)val << 48) | ((ulong)val << 32) | ((ulong)val << 16) | ((ulong)val); - _memset_internal(uptr, uval, qwords); + ulong *uqptr = (ulong *)uptr; + + // move qword by qword + while (words > QWORDS_TO_WORDS(1)) { + words -= QWORDS_TO_WORDS(1); + *uqptr++ = uval; + } - uptr += qwords * QWORD_SIZE / WORD_SIZE; - words %= QWORD_SIZE / WORD_SIZE; + uptr = (ushort *)(ulong)uqptr; } // deal with what's left @@ -131,23 +116,79 @@ void *memsetd(void *ptr, int val, size_t dwords) // void *memsetq(void *ptr, long val, size_t qwords) { - return _memset_internal(ptr, (ulong)val, qwords); + ulong *uptr = (ulong *)ptr; + + while (qwords--) *uptr++ = (ulong)val; + + return ptr; } // // Set "bytes"-many bytes starting from ptr to 0 +// +// WARNING +// Assume "bytes" is large, for small sizes +// use memset(ptr, 0, bytes) directly // void *memzero(void *ptr, size_t bytes) { - // is direct aligned access possible? (is "unlikely" good here?) - if unlikely (bytes % QWORD_SIZE && (ulong)ptr % QWORD_ALIGN) { - return _memset_internal(ptr, (ulong)0, bytes/QWORD_SIZE); - } - - if unlikely (bytes % WORD_SIZE && (ulong)ptr % WORD_ALIGN) { - return memsetw(ptr, (int)0, bytes/WORD_SIZE); - } - - return memset(ptr, 0, bytes); + return memsetb(ptr, 0, bytes); +} + + +// +// Copy "bytes"-many bytes of src to dst +// Does not deal with overlapping blocks (memmove's job) +// +void *memcpy(void *dst, const void *src, size_t bytes) +{ + const ulong *usrc = (const ulong *)src; + ulong *udst = (ulong *)dst; + + if unlikely (bytes == 0) return dst; + + // can we align them both at once? + if unlikely ((ulong)src % WORD_ALIGN == 1 + && (ulong)dst % WORD_ALIGN == 1) { + const uchar *ubsrc = (const uchar *)usrc; + uchar *ubdst = (uchar *)udst; + + *ubdst++ = *ubsrc++; + bytes--; + + udst = (ulong *)ubdst; + usrc = (ulong *)ubsrc; + } + + const ushort *uwsrc = (const ushort *)usrc; + ushort *uwdst = (ushort *)udst; + + // align either dst or src for qword access + while ((ulong)dst % QWORD_ALIGN > 0 + && (ulong)src % QWORD_ALIGN > 0 + && bytes > WORD_SIZE) { + + *uwdst++ = *uwsrc++; + bytes -= WORD_SIZE; + } + + udst = (ulong *)uwdst; + usrc = (ulong *)uwsrc; + + // should be most of the job + while (bytes > QWORD_SIZE) { + *udst++ = *usrc++; + bytes -= QWORD_SIZE; + } + + const uchar *ubsrc = (const uchar *)usrc; + ushort *ubdst = (ushort *)udst; + + // deal with what's left + while (bytes--) { + *ubdst ++ = *ubsrc++; + } + + return dst; } diff --git a/src/kaleid/common/memsub.c b/src/kaleid/common/memsub.c deleted file mode 100644 index 247c4d3..0000000 --- a/src/kaleid/common/memsub.c +++ /dev/null @@ -1,37 +0,0 @@ -//----------------------------------------------------------------------------// -// GNU GPL OS/K // -// // -// Authors: spectral` // -// NeoX // -// // -// Desc: mem*() functions, suboptimal edition // -//----------------------------------------------------------------------------// - -#include - -// -// Set "bytes"-many bytes starting from ptr to val -// -void *memset(void *ptr, int val, size_t bytes) -{ - uchar uval = val & 0xFF; - uchar *uptr = (uchar *)ptr; - - while (bytes--) *uptr++ = uval; - - return ptr; -} - -// -// Set "bytes"-many bytes starting from ptr to 0 -// -void *memzero(void *ptr, size_t bytes) -{ - uchar *uptr = (uchar *)ptr; - - while (bytes--) *uptr++ = 0; - - return ptr; -} - - diff --git a/src/kaleid/include/kallims.h b/src/kaleid/include/kallims.h index 57ba2fa..ac5b158 100644 --- a/src/kaleid/include/kallims.h +++ b/src/kaleid/include/kallims.h @@ -41,6 +41,22 @@ # define LONG_BIT QWORD_BIT #endif +#ifndef DATA_SHIFTS_BLOCK +#define DATA_SHIFTS_BLOCK +# define BYTES_TO_WORDS(B) ((B) >> 1) +# define BYTES_TO_DWORDS(B) ((B) >> 2) +# define BYTES_TO_QWORDS(B) ((B) >> 3) +# define WORDS_TO_BYTES(W) ((W) << 1) +# define WORDS_TO_DWORDS(W) ((W) >> 1) +# define WORDS_TO_QWORDS(W) ((W) >> 2) +# define DWORDS_TO_BYTES(D) ((D) << 2) +# define DWORDS_TO_WORDS(D) ((D) << 1) +# define DWORDS_TO_QWORDS(D) ((D) >> 1) +# define QWORDS_TO_BYTES(Q) ((Q) << 3) +# define QWORDS_TO_WORDS(Q) ((Q) << 2) +# define QWORDS_TO_DWORDS(Q) ((Q) << 1) +#endif + //------------------------------------------// // Numeric data limits // //------------------------------------------//