src/runtime_strings.js - external/github.com/emscripten-core/emscripten - Git at Google

 /**
  * @license
  * Copyright 2019 The Emscripten Authors
  * SPDX-License-Identifier: MIT
  */

 // runtime_strings.js: String related runtime functions that are part of both
 // MINIMAL_RUNTIME and regular runtime.

 #if TEXTDECODER == 2
 var UTF8Decoder = new TextDecoder('utf8');
 #elif TEXTDECODER == 1
 var UTF8Decoder = typeof TextDecoder != 'undefined' ? new TextDecoder('utf8') : undefined;
 #endif

 /**
  * Given a pointer 'idx' to a null-terminated UTF8-encoded string in the given
  * array that contains uint8 values, returns a copy of that string as a
  * Javascript String object.
  * heapOrArray is either a regular array, or a JavaScript typed array view.
  * @param {number} idx
  * @param {number=} maxBytesToRead
  * @return {string}
  */
 function UTF8ArrayToString(heapOrArray, idx, maxBytesToRead) {
 #if CAN_ADDRESS_2GB
   idx >>>= 0;
 #endif
   var endIdx = idx + maxBytesToRead;
 #if TEXTDECODER
   var endPtr = idx;
   // TextDecoder needs to know the byte length in advance, it doesn't stop on
   // null terminator by itself.  Also, use the length info to avoid running tiny
   // strings through TextDecoder, since .subarray() allocates garbage.
   // (As a tiny code save trick, compare endPtr against endIdx using a negation,
   // so that undefined means Infinity)
   while (heapOrArray[endPtr] && !(endPtr >= endIdx)) ++endPtr;
 #endif // TEXTDECODER

 #if TEXTDECODER == 2
   return UTF8Decoder.decode(heapOrArray.buffer ? {{{ getUnsharedTextDecoderView('heapOrArray', 'idx', 'endPtr') }}} : new Uint8Array(heapOrArray.slice(idx, endPtr)));
 #else // TEXTDECODER == 2
 #if TEXTDECODER
   if (endPtr - idx > 16 && heapOrArray.buffer && UTF8Decoder) {
     return UTF8Decoder.decode({{{ getUnsharedTextDecoderView('heapOrArray', 'idx', 'endPtr') }}});
   }
 #endif // TEXTDECODER
   var str = '';
 #if TEXTDECODER
   // If building with TextDecoder, we have already computed the string length
   // above, so test loop end condition against that
   while (idx < endPtr) {
 #else
   while (!(idx >= endIdx)) {
 #endif
     // For UTF8 byte structure, see:
     // http://en.wikipedia.org/wiki/UTF-8#Description
     // https://www.ietf.org/rfc/rfc2279.txt
     // https://tools.ietf.org/html/rfc3629
     var u0 = heapOrArray[idx++];
 #if !TEXTDECODER
     // If not building with TextDecoder enabled, we don't know the string
     // length, so scan for \0 byte.
     // If building with TextDecoder, we know exactly at what byte index the
     // string ends, so checking for nulls here would be redundant.
     if (!u0) return str;
 #endif
     if (!(u0 & 0x80)) { str += String.fromCharCode(u0); continue; }
     var u1 = heapOrArray[idx++] & 63;
     if ((u0 & 0xE0) == 0xC0) { str += String.fromCharCode(((u0 & 31) << 6) | u1); continue; }
     var u2 = heapOrArray[idx++] & 63;
     if ((u0 & 0xF0) == 0xE0) {
       u0 = ((u0 & 15) << 12) | (u1 << 6) | u2;
     } else {
 #if ASSERTIONS
       if ((u0 & 0xF8) != 0xF0) warnOnce('Invalid UTF-8 leading byte ' + ptrToString(u0) + ' encountered when deserializing a UTF-8 string in wasm memory to a JS string!');
 #endif
       u0 = ((u0 & 7) << 18) | (u1 << 12) | (u2 << 6) | (heapOrArray[idx++] & 63);
     }

     if (u0 < 0x10000) {
       str += String.fromCharCode(u0);
     } else {
       var ch = u0 - 0x10000;
       str += String.fromCharCode(0xD800 | (ch >> 10), 0xDC00 | (ch & 0x3FF));
     }
   }
   return str;
 #endif // TEXTDECODER == 2
 }

 /**
  * Given a pointer 'ptr' to a null-terminated UTF8-encoded string in the
  * emscripten HEAP, returns a copy of that string as a Javascript String object.
  *
  * @param {number} ptr
  * @param {number=} maxBytesToRead - An optional length that specifies the
  *   maximum number of bytes to read. You can omit this parameter to scan the
  *   string until the first \0 byte. If maxBytesToRead is passed, and the string
  *   at [ptr, ptr+maxBytesToReadr[ contains a null byte in the middle, then the
  *   string will cut short at that byte index (i.e. maxBytesToRead will not
  *   produce a string of exact length [ptr, ptr+maxBytesToRead[) N.B. mixing
  *   frequent uses of UTF8ToString() with and without maxBytesToRead may throw
  *   JS JIT optimizations off, so it is worth to consider consistently using one
  * @return {string}
  */
 function UTF8ToString(ptr, maxBytesToRead) {
 #if ASSERTIONS
   assert(typeof ptr == 'number');
 #endif
 #if CAN_ADDRESS_2GB
   ptr >>>= 0;
 #endif
 #if TEXTDECODER == 2
   if (!ptr) return '';
   var maxPtr = ptr + maxBytesToRead;
   for (var end = ptr; !(end >= maxPtr) && HEAPU8[end];) ++end;
   return UTF8Decoder.decode({{{ getUnsharedTextDecoderView('HEAPU8', 'ptr', 'end') }}});
 #else
   return ptr ? UTF8ArrayToString(HEAPU8, ptr, maxBytesToRead) : '';
 #endif
 }

 /**
  * Copies the given Javascript String object 'str' to the given byte array at
  * address 'outIdx', encoded in UTF8 form and null-terminated. The copy will
  * require at most str.length*4+1 bytes of space in the HEAP.  Use the function
  * lengthBytesUTF8 to compute the exact number of bytes (excluding null
  * terminator) that this function will write.
  *
  * @param {string} str - The Javascript string to copy.
  * @param {ArrayBufferView|Array<number>} heap - The array to copy to. Each
  *                                               index in this array is assumed
  *                                               to be one 8-byte element.
  * @param {number} outIdx - The starting offset in the array to begin the copying.
  * @param {number} maxBytesToWrite - The maximum number of bytes this function
  *                                   can write to the array.  This count should
  *                                   include the null terminator, i.e. if
  *                                   maxBytesToWrite=1, only the null terminator
  *                                   will be written and nothing else.
  *                                   maxBytesToWrite=0 does not write any bytes
  *                                   to the output, not even the null
  *                                   terminator.
  * @return {number} The number of bytes written, EXCLUDING the null terminator.
  */
 function stringToUTF8Array(str, heap, outIdx, maxBytesToWrite) {
 #if CAN_ADDRESS_2GB
   outIdx >>>= 0;
 #endif
   // Parameter maxBytesToWrite is not optional. Negative values, 0, null,
   // undefined and false each don't write out any bytes.
   if (!(maxBytesToWrite > 0))
     return 0;

   var startIdx = outIdx;
   var endIdx = outIdx + maxBytesToWrite - 1; // -1 for string null terminator.
   for (var i = 0; i < str.length; ++i) {
     // Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code
     // unit, not a Unicode code point of the character! So decode
     // UTF16->UTF32->UTF8.
     // See http://unicode.org/faq/utf_bom.html#utf16-3
     // For UTF8 byte structure, see http://en.wikipedia.org/wiki/UTF-8#Description
     // and https://www.ietf.org/rfc/rfc2279.txt
     // and https://tools.ietf.org/html/rfc3629
     var u = str.charCodeAt(i); // possibly a lead surrogate
     if (u >= 0xD800 && u <= 0xDFFF) {
       var u1 = str.charCodeAt(++i);
       u = 0x10000 + ((u & 0x3FF) << 10) | (u1 & 0x3FF);
     }
     if (u <= 0x7F) {
       if (outIdx >= endIdx) break;
       heap[outIdx++] = u;
     } else if (u <= 0x7FF) {
       if (outIdx + 1 >= endIdx) break;
       heap[outIdx++] = 0xC0 | (u >> 6);
       heap[outIdx++] = 0x80 | (u & 63);
     } else if (u <= 0xFFFF) {
       if (outIdx + 2 >= endIdx) break;
       heap[outIdx++] = 0xE0 | (u >> 12);
       heap[outIdx++] = 0x80 | ((u >> 6) & 63);
       heap[outIdx++] = 0x80 | (u & 63);
     } else {
       if (outIdx + 3 >= endIdx) break;
 #if ASSERTIONS
       if (u > 0x10FFFF) warnOnce('Invalid Unicode code point ' + ptrToString(u) + ' encountered when serializing a JS string to a UTF-8 string in wasm memory! (Valid unicode code points should be in range 0-0x10FFFF).');
 #endif
       heap[outIdx++] = 0xF0 | (u >> 18);
       heap[outIdx++] = 0x80 | ((u >> 12) & 63);
       heap[outIdx++] = 0x80 | ((u >> 6) & 63);
       heap[outIdx++] = 0x80 | (u & 63);
     }
   }
   // Null-terminate the pointer to the buffer.
   heap[outIdx] = 0;
   return outIdx - startIdx;
 }

 /**
  * Copies the given Javascript String object 'str' to the emscripten HEAP at
  * address 'outPtr', null-terminated and encoded in UTF8 form. The copy will
  * require at most str.length*4+1 bytes of space in the HEAP.
  * Use the function lengthBytesUTF8 to compute the exact number of bytes
  * (excluding null terminator) that this function will write.
  *
  * @return {number} The number of bytes written, EXCLUDING the null terminator.
  */
 function stringToUTF8(str, outPtr, maxBytesToWrite) {
 #if ASSERTIONS
   assert(typeof maxBytesToWrite == 'number', 'stringToUTF8(str, outPtr, maxBytesToWrite) is missing the third parameter that specifies the length of the output buffer!');
 #endif
   return stringToUTF8Array(str, {{{ heapAndOffset('HEAPU8', 'outPtr') }}}, maxBytesToWrite);
 }

 /**
  * Returns the number of bytes the given Javascript string takes if encoded as a
  * UTF8 byte array, EXCLUDING the null terminator byte.
  *
  * @param {string} str - JavaScript string to operator on
  * @return {number} Length, in bytes, of the UTF8 encoded string.
  */
 function lengthBytesUTF8(str) {
   var len = 0;
   for (var i = 0; i < str.length; ++i) {
     // Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code
     // unit, not a Unicode code point of the character! So decode
     // UTF16->UTF32->UTF8.
     // See http://unicode.org/faq/utf_bom.html#utf16-3
     var c = str.charCodeAt(i); // possibly a lead surrogate
     if (c <= 0x7F) {
       len++;
     } else if (c <= 0x7FF) {
       len += 2;
     } else if (c >= 0xD800 && c <= 0xDFFF) {
       len += 4; ++i;
     } else {
       len += 3;
     }
   }
   return len;
 }
	/**
	* @license
	* Copyright 2019 The Emscripten Authors
	* SPDX-License-Identifier: MIT
	*/

	// runtime_strings.js: String related runtime functions that are part of both
	// MINIMAL_RUNTIME and regular runtime.

	#if TEXTDECODER == 2
	var UTF8Decoder = new TextDecoder('utf8');
	#elif TEXTDECODER == 1
	var UTF8Decoder = typeof TextDecoder != 'undefined' ? new TextDecoder('utf8') : undefined;
	#endif

	/**
	* Given a pointer 'idx' to a null-terminated UTF8-encoded string in the given
	* array that contains uint8 values, returns a copy of that string as a
	* Javascript String object.
	* heapOrArray is either a regular array, or a JavaScript typed array view.
	* @param {number} idx
	* @param {number=} maxBytesToRead
	* @return {string}
	*/
	function UTF8ArrayToString(heapOrArray, idx, maxBytesToRead) {
	#if CAN_ADDRESS_2GB
	idx >>>= 0;
	#endif
	var endIdx = idx + maxBytesToRead;
	#if TEXTDECODER
	var endPtr = idx;
	// TextDecoder needs to know the byte length in advance, it doesn't stop on
	// null terminator by itself. Also, use the length info to avoid running tiny
	// strings through TextDecoder, since .subarray() allocates garbage.
	// (As a tiny code save trick, compare endPtr against endIdx using a negation,
	// so that undefined means Infinity)
	while (heapOrArray[endPtr] && !(endPtr >= endIdx)) ++endPtr;
	#endif // TEXTDECODER

	#if TEXTDECODER == 2
	return UTF8Decoder.decode(heapOrArray.buffer ? {{{ getUnsharedTextDecoderView('heapOrArray', 'idx', 'endPtr') }}} : new Uint8Array(heapOrArray.slice(idx, endPtr)));
	#else // TEXTDECODER == 2
	#if TEXTDECODER
	if (endPtr - idx > 16 && heapOrArray.buffer && UTF8Decoder) {
	return UTF8Decoder.decode({{{ getUnsharedTextDecoderView('heapOrArray', 'idx', 'endPtr') }}});
	}
	#endif // TEXTDECODER
	var str = '';
	#if TEXTDECODER
	// If building with TextDecoder, we have already computed the string length
	// above, so test loop end condition against that
	while (idx < endPtr) {
	#else
	while (!(idx >= endIdx)) {
	#endif
	// For UTF8 byte structure, see:
	// http://en.wikipedia.org/wiki/UTF-8#Description
	// https://www.ietf.org/rfc/rfc2279.txt
	// https://tools.ietf.org/html/rfc3629
	var u0 = heapOrArray[idx++];
	#if !TEXTDECODER
	// If not building with TextDecoder enabled, we don't know the string
	// length, so scan for \0 byte.
	// If building with TextDecoder, we know exactly at what byte index the
	// string ends, so checking for nulls here would be redundant.
	if (!u0) return str;
	#endif
	if (!(u0 & 0x80)) { str += String.fromCharCode(u0); continue; }
	var u1 = heapOrArray[idx++] & 63;
	if ((u0 & 0xE0) == 0xC0) { str += String.fromCharCode(((u0 & 31) << 6) \| u1); continue; }
	var u2 = heapOrArray[idx++] & 63;
	if ((u0 & 0xF0) == 0xE0) {
	u0 = ((u0 & 15) << 12) \| (u1 << 6) \| u2;
	} else {
	#if ASSERTIONS
	if ((u0 & 0xF8) != 0xF0) warnOnce('Invalid UTF-8 leading byte ' + ptrToString(u0) + ' encountered when deserializing a UTF-8 string in wasm memory to a JS string!');
	#endif
	u0 = ((u0 & 7) << 18) \| (u1 << 12) \| (u2 << 6) \| (heapOrArray[idx++] & 63);
	}

	if (u0 < 0x10000) {
	str += String.fromCharCode(u0);
	} else {
	var ch = u0 - 0x10000;
	str += String.fromCharCode(0xD800 \| (ch >> 10), 0xDC00 \| (ch & 0x3FF));
	}
	}
	return str;
	#endif // TEXTDECODER == 2
	}

	/**
	* Given a pointer 'ptr' to a null-terminated UTF8-encoded string in the
	* emscripten HEAP, returns a copy of that string as a Javascript String object.
	*
	* @param {number} ptr
	* @param {number=} maxBytesToRead - An optional length that specifies the
	* maximum number of bytes to read. You can omit this parameter to scan the
	* string until the first \0 byte. If maxBytesToRead is passed, and the string
	* at [ptr, ptr+maxBytesToReadr[ contains a null byte in the middle, then the
	* string will cut short at that byte index (i.e. maxBytesToRead will not
	* produce a string of exact length [ptr, ptr+maxBytesToRead[) N.B. mixing
	* frequent uses of UTF8ToString() with and without maxBytesToRead may throw
	* JS JIT optimizations off, so it is worth to consider consistently using one
	* @return {string}
	*/
	function UTF8ToString(ptr, maxBytesToRead) {
	#if ASSERTIONS
	assert(typeof ptr == 'number');
	#endif
	#if CAN_ADDRESS_2GB
	ptr >>>= 0;
	#endif
	#if TEXTDECODER == 2
	if (!ptr) return '';
	var maxPtr = ptr + maxBytesToRead;
	for (var end = ptr; !(end >= maxPtr) && HEAPU8[end];) ++end;
	return UTF8Decoder.decode({{{ getUnsharedTextDecoderView('HEAPU8', 'ptr', 'end') }}});
	#else
	return ptr ? UTF8ArrayToString(HEAPU8, ptr, maxBytesToRead) : '';
	#endif
	}

	/**
	* Copies the given Javascript String object 'str' to the given byte array at
	* address 'outIdx', encoded in UTF8 form and null-terminated. The copy will
	* require at most str.length*4+1 bytes of space in the HEAP. Use the function
	* lengthBytesUTF8 to compute the exact number of bytes (excluding null
	* terminator) that this function will write.
	*
	* @param {string} str - The Javascript string to copy.
	* @param {ArrayBufferView\|Array<number>} heap - The array to copy to. Each
	* index in this array is assumed
	* to be one 8-byte element.
	* @param {number} outIdx - The starting offset in the array to begin the copying.
	* @param {number} maxBytesToWrite - The maximum number of bytes this function
	* can write to the array. This count should
	* include the null terminator, i.e. if
	* maxBytesToWrite=1, only the null terminator
	* will be written and nothing else.
	* maxBytesToWrite=0 does not write any bytes
	* to the output, not even the null
	* terminator.
	* @return {number} The number of bytes written, EXCLUDING the null terminator.
	*/
	function stringToUTF8Array(str, heap, outIdx, maxBytesToWrite) {
	#if CAN_ADDRESS_2GB
	outIdx >>>= 0;
	#endif
	// Parameter maxBytesToWrite is not optional. Negative values, 0, null,
	// undefined and false each don't write out any bytes.
	if (!(maxBytesToWrite > 0))
	return 0;

	var startIdx = outIdx;
	var endIdx = outIdx + maxBytesToWrite - 1; // -1 for string null terminator.
	for (var i = 0; i < str.length; ++i) {
	// Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code
	// unit, not a Unicode code point of the character! So decode
	// UTF16->UTF32->UTF8.
	// See http://unicode.org/faq/utf_bom.html#utf16-3
	// For UTF8 byte structure, see http://en.wikipedia.org/wiki/UTF-8#Description
	// and https://www.ietf.org/rfc/rfc2279.txt
	// and https://tools.ietf.org/html/rfc3629
	var u = str.charCodeAt(i); // possibly a lead surrogate
	if (u >= 0xD800 && u <= 0xDFFF) {
	var u1 = str.charCodeAt(++i);
	u = 0x10000 + ((u & 0x3FF) << 10) \| (u1 & 0x3FF);
	}
	if (u <= 0x7F) {
	if (outIdx >= endIdx) break;
	heap[outIdx++] = u;
	} else if (u <= 0x7FF) {
	if (outIdx + 1 >= endIdx) break;
	heap[outIdx++] = 0xC0 \| (u >> 6);
	heap[outIdx++] = 0x80 \| (u & 63);
	} else if (u <= 0xFFFF) {
	if (outIdx + 2 >= endIdx) break;
	heap[outIdx++] = 0xE0 \| (u >> 12);
	heap[outIdx++] = 0x80 \| ((u >> 6) & 63);
	heap[outIdx++] = 0x80 \| (u & 63);
	} else {
	if (outIdx + 3 >= endIdx) break;
	#if ASSERTIONS
	if (u > 0x10FFFF) warnOnce('Invalid Unicode code point ' + ptrToString(u) + ' encountered when serializing a JS string to a UTF-8 string in wasm memory! (Valid unicode code points should be in range 0-0x10FFFF).');
	#endif
	heap[outIdx++] = 0xF0 \| (u >> 18);
	heap[outIdx++] = 0x80 \| ((u >> 12) & 63);
	heap[outIdx++] = 0x80 \| ((u >> 6) & 63);
	heap[outIdx++] = 0x80 \| (u & 63);
	}
	}
	// Null-terminate the pointer to the buffer.
	heap[outIdx] = 0;
	return outIdx - startIdx;
	}

	/**
	* Copies the given Javascript String object 'str' to the emscripten HEAP at
	* address 'outPtr', null-terminated and encoded in UTF8 form. The copy will
	* require at most str.length*4+1 bytes of space in the HEAP.
	* Use the function lengthBytesUTF8 to compute the exact number of bytes
	* (excluding null terminator) that this function will write.
	*
	* @return {number} The number of bytes written, EXCLUDING the null terminator.
	*/
	function stringToUTF8(str, outPtr, maxBytesToWrite) {
	#if ASSERTIONS
	assert(typeof maxBytesToWrite == 'number', 'stringToUTF8(str, outPtr, maxBytesToWrite) is missing the third parameter that specifies the length of the output buffer!');
	#endif
	return stringToUTF8Array(str, {{{ heapAndOffset('HEAPU8', 'outPtr') }}}, maxBytesToWrite);
	}

	/**
	* Returns the number of bytes the given Javascript string takes if encoded as a
	* UTF8 byte array, EXCLUDING the null terminator byte.
	*
	* @param {string} str - JavaScript string to operator on
	* @return {number} Length, in bytes, of the UTF8 encoded string.
	*/
	function lengthBytesUTF8(str) {
	var len = 0;
	for (var i = 0; i < str.length; ++i) {
	// Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code
	// unit, not a Unicode code point of the character! So decode
	// UTF16->UTF32->UTF8.
	// See http://unicode.org/faq/utf_bom.html#utf16-3
	var c = str.charCodeAt(i); // possibly a lead surrogate
	if (c <= 0x7F) {
	len++;
	} else if (c <= 0x7FF) {
	len += 2;
	} else if (c >= 0xD800 && c <= 0xDFFF) {
	len += 4; ++i;
	} else {
	len += 3;
	}
	}
	return len;
	}