lib/libc/emscripten_memcpy.c - external/github.com/kripken/emscripten - Git at Google

 /*
  * A simple memcpy optimized for wasm.
  */

 #include <stdint.h>
 #include <string.h>

 // An external JS implementation that is efficient for very large copies, using
 // HEAPU8.set()
 extern void *emscripten_memcpy_big(void *restrict dest, const void *restrict src, size_t n);

 // XXX EMSCRIPTEN ASAN: build an uninstrumented version of memcpy
 #if defined(__EMSCRIPTEN__) && defined(__has_feature)
 #if __has_feature(address_sanitizer)
 #define memcpy __attribute__((no_sanitize("address"))) emscripten_builtin_memcpy
 #endif
 #endif

 void *memcpy(void *restrict dest, const void *restrict src, size_t n)
 {
   unsigned char *d = dest;
   const unsigned char *s = src;

   unsigned char *aligned_d_end;
   unsigned char *block_aligned_d_end;
   unsigned char *d_end;

   if (n >= 8192) {
     emscripten_memcpy_big(dest, src, n);
     return dest;
   }

   d_end = d + n;
   if ((((uintptr_t)d) & 3) == (((uintptr_t)s) & 3)) {
     // The initial unaligned < 4-byte front.
     while ((((uintptr_t)d) & 3) && d < d_end) {
       *d++ = *s++;
     }
     aligned_d_end = (unsigned char *)(((uintptr_t)d_end) & -4);
     if (((uintptr_t)aligned_d_end) >= 64) {
       block_aligned_d_end = aligned_d_end - 64;
       while (d <= block_aligned_d_end) {
         // TODO: we could use 64-bit ops here, but we'd need to make sure the
         //       alignment is 64-bit, which might cost us
         *(((uint32_t*)d)) = *(((uint32_t*)s));
         *(((uint32_t*)d) + 1) = *(((uint32_t*)s) + 1);
         *(((uint32_t*)d) + 2) = *(((uint32_t*)s) + 2);
         *(((uint32_t*)d) + 3) = *(((uint32_t*)s) + 3);
         *(((uint32_t*)d) + 4) = *(((uint32_t*)s) + 4);
         *(((uint32_t*)d) + 5) = *(((uint32_t*)s) + 5);
         *(((uint32_t*)d) + 6) = *(((uint32_t*)s) + 6);
         *(((uint32_t*)d) + 7) = *(((uint32_t*)s) + 7);
         *(((uint32_t*)d) + 8) = *(((uint32_t*)s) + 8);
         *(((uint32_t*)d) + 9) = *(((uint32_t*)s) + 9);
         *(((uint32_t*)d) + 10) = *(((uint32_t*)s) + 10);
         *(((uint32_t*)d) + 11) = *(((uint32_t*)s) + 11);
         *(((uint32_t*)d) + 12) = *(((uint32_t*)s) + 12);
         *(((uint32_t*)d) + 13) = *(((uint32_t*)s) + 13);
         *(((uint32_t*)d) + 14) = *(((uint32_t*)s) + 14);
         *(((uint32_t*)d) + 15) = *(((uint32_t*)s) + 15);
         d += 64;
         s += 64;
       }
     }
     while (d < aligned_d_end) {
       *((uint32_t *)d) = *((uint32_t *)s);
       d += 4;
       s += 4;
     }
   } else {
     // In the unaligned copy case, unroll a bit as well.
     if (((uintptr_t)d_end) >= 4) {
       aligned_d_end = d_end - 4;
       while (d <= aligned_d_end) {
         *d = *s;
         *(d + 1) = *(s + 1);
         *(d + 2) = *(s + 2);
         *(d + 3) = *(s + 3);
         d += 4;
         s += 4;
       }
     }
   }
   // The remaining unaligned < 4 byte tail.
   while (d < d_end) {
     *d++ = *s++;
   }
   return dest;


   for (; n; n--) *d++ = *s++;
   return dest;
 }
	/*
	* A simple memcpy optimized for wasm.
	*/

	#include <stdint.h>
	#include <string.h>

	// An external JS implementation that is efficient for very large copies, using
	// HEAPU8.set()
	extern void emscripten_memcpy_big(void restrict dest, const void *restrict src, size_t n);

	// XXX EMSCRIPTEN ASAN: build an uninstrumented version of memcpy
	#if defined(__EMSCRIPTEN__) && defined(__has_feature)
	#if __has_feature(address_sanitizer)
	#define memcpy __attribute__((no_sanitize("address"))) emscripten_builtin_memcpy
	#endif
	#endif

	void memcpy(void restrict dest, const void *restrict src, size_t n)
	{
	unsigned char *d = dest;
	const unsigned char *s = src;

	unsigned char *aligned_d_end;
	unsigned char *block_aligned_d_end;
	unsigned char *d_end;

	if (n >= 8192) {
	emscripten_memcpy_big(dest, src, n);
	return dest;
	}

	d_end = d + n;
	if ((((uintptr_t)d) & 3) == (((uintptr_t)s) & 3)) {
	// The initial unaligned < 4-byte front.
	while ((((uintptr_t)d) & 3) && d < d_end) {
	d++ = s++;
	}
	aligned_d_end = (unsigned char *)(((uintptr_t)d_end) & -4);
	if (((uintptr_t)aligned_d_end) >= 64) {
	block_aligned_d_end = aligned_d_end - 64;
	while (d <= block_aligned_d_end) {
	// TODO: we could use 64-bit ops here, but we'd need to make sure the
	// alignment is 64-bit, which might cost us
	(((uint32_t)d)) = (((uint32_t)s));
	(((uint32_t)d) + 1) = (((uint32_t)s) + 1);
	(((uint32_t)d) + 2) = (((uint32_t)s) + 2);
	(((uint32_t)d) + 3) = (((uint32_t)s) + 3);
	(((uint32_t)d) + 4) = (((uint32_t)s) + 4);
	(((uint32_t)d) + 5) = (((uint32_t)s) + 5);
	(((uint32_t)d) + 6) = (((uint32_t)s) + 6);
	(((uint32_t)d) + 7) = (((uint32_t)s) + 7);
	(((uint32_t)d) + 8) = (((uint32_t)s) + 8);
	(((uint32_t)d) + 9) = (((uint32_t)s) + 9);
	(((uint32_t)d) + 10) = (((uint32_t)s) + 10);
	(((uint32_t)d) + 11) = (((uint32_t)s) + 11);
	(((uint32_t)d) + 12) = (((uint32_t)s) + 12);
	(((uint32_t)d) + 13) = (((uint32_t)s) + 13);
	(((uint32_t)d) + 14) = (((uint32_t)s) + 14);
	(((uint32_t)d) + 15) = (((uint32_t)s) + 15);
	d += 64;
	s += 64;
	}
	}
	while (d < aligned_d_end) {
	((uint32_t )d) = ((uint32_t )s);
	d += 4;
	s += 4;
	}
	} else {
	// In the unaligned copy case, unroll a bit as well.
	if (((uintptr_t)d_end) >= 4) {
	aligned_d_end = d_end - 4;
	while (d <= aligned_d_end) {
	d = s;
	(d + 1) = (s + 1);
	(d + 2) = (s + 2);
	(d + 3) = (s + 3);
	d += 4;
	s += 4;
	}
	}
	}
	// The remaining unaligned < 4 byte tail.
	while (d < d_end) {
	d++ = s++;
	}
	return dest;


	for (; n; n--) d++ = s++;
	return dest;
	}