blob: 9ae73bf6f9f7ab2bf3182464d82e06d73484f270 [file] [log] [blame]
/**
* @license
* Copyright 2020 The Emscripten Authors
* SPDX-License-Identifier: MIT
*/
#if TEXTDECODER != 1 && TEXTDECODER != 2
#error "TEXTDECODER must be either 1 or 2"
#endif
addToLibrary({
// TextDecoder constructor defaults to UTF-8
#if TEXTDECODER == 2
$UTF8Decoder: "new TextDecoder()",
#else
$UTF8Decoder: "typeof TextDecoder != 'undefined' ? new TextDecoder() : undefined",
#endif
$findStringEnd: (heapOrArray, idx, maxBytesToRead, ignoreNul) => {
var maxIdx = idx + maxBytesToRead;
if (ignoreNul) return maxIdx;
// TextDecoder needs to know the byte length in advance, it doesn't stop on
// null terminator by itself.
// As a tiny code save trick, compare idx against maxIdx using a negation,
// so that maxBytesToRead=undefined/NaN means Infinity.
while (heapOrArray[idx] && !(idx >= maxIdx)) ++idx;
return idx;
},
$findStringEnd__internal: true,
$UTF8ArrayToString__docs: `
/**
* Given a pointer 'idx' to a null-terminated UTF8-encoded string in the given
* array that contains uint8 values, returns a copy of that string as a
* Javascript String object.
* heapOrArray is either a regular array, or a JavaScript typed array view.
* @param {number=} idx
* @param {number=} maxBytesToRead
* @param {boolean=} ignoreNul - If true, the function will not stop on a NUL character.
* @return {string}
*/`,
$UTF8ArrayToString__deps: [
'$UTF8Decoder', '$findStringEnd',
#if ASSERTIONS
'$warnOnce',
#endif
],
$UTF8ArrayToString: (heapOrArray, idx = 0, maxBytesToRead, ignoreNul) => {
#if CAN_ADDRESS_2GB
idx >>>= 0;
#endif
var endPtr = findStringEnd(heapOrArray, idx, maxBytesToRead, ignoreNul);
#if TEXTDECODER == 2
return UTF8Decoder.decode(heapOrArray.buffer ? {{{ getUnsharedTextDecoderView('heapOrArray', 'idx', 'endPtr') }}} : new Uint8Array(heapOrArray.slice(idx, endPtr)));
#else // TEXTDECODER == 2
// When using conditional TextDecoder, skip it for short strings as the overhead of the native call is not worth it.
if (endPtr - idx > 16 && heapOrArray.buffer && UTF8Decoder) {
return UTF8Decoder.decode({{{ getUnsharedTextDecoderView('heapOrArray', 'idx', 'endPtr') }}});
}
var str = '';
while (idx < endPtr) {
// For UTF8 byte structure, see:
// http://en.wikipedia.org/wiki/UTF-8#Description
// https://www.ietf.org/rfc/rfc2279.txt
// https://tools.ietf.org/html/rfc3629
var u0 = heapOrArray[idx++];
if (!(u0 & 0x80)) { str += String.fromCharCode(u0); continue; }
var u1 = heapOrArray[idx++] & 63;
if ((u0 & 0xE0) == 0xC0) { str += String.fromCharCode(((u0 & 31) << 6) | u1); continue; }
var u2 = heapOrArray[idx++] & 63;
if ((u0 & 0xF0) == 0xE0) {
u0 = ((u0 & 15) << 12) | (u1 << 6) | u2;
} else {
#if ASSERTIONS
if ((u0 & 0xF8) != 0xF0) warnOnce('Invalid UTF-8 leading byte ' + ptrToString(u0) + ' encountered when deserializing a UTF-8 string in wasm memory to a JS string!');
#endif
u0 = ((u0 & 7) << 18) | (u1 << 12) | (u2 << 6) | (heapOrArray[idx++] & 63);
}
if (u0 < 0x10000) {
str += String.fromCharCode(u0);
} else {
var ch = u0 - 0x10000;
str += String.fromCharCode(0xD800 | (ch >> 10), 0xDC00 | (ch & 0x3FF));
}
}
return str;
#endif // TEXTDECODER == 2
},
$UTF8ToString__docs: `
/**
* Given a pointer 'ptr' to a null-terminated UTF8-encoded string in the
* emscripten HEAP, returns a copy of that string as a Javascript String object.
*
* @param {number} ptr
* @param {number=} maxBytesToRead - An optional length that specifies the
* maximum number of bytes to read. You can omit this parameter to scan the
* string until the first 0 byte. If maxBytesToRead is passed, and the string
* at [ptr, ptr+maxBytesToReadr[ contains a null byte in the middle, then the
* string will cut short at that byte index.
* @param {boolean=} ignoreNul - If true, the function will not stop on a NUL character.
* @return {string}
*/`,
#if TEXTDECODER == 2
$UTF8ToString__deps: ['$UTF8Decoder', '$findStringEnd'],
#else
$UTF8ToString__deps: ['$UTF8ArrayToString'],
#endif
$UTF8ToString: (ptr, maxBytesToRead, ignoreNul) => {
#if ASSERTIONS
assert(typeof ptr == 'number', `UTF8ToString expects a number (got ${typeof ptr})`);
#endif
#if CAN_ADDRESS_2GB
ptr >>>= 0;
#endif
#if TEXTDECODER == 2
if (!ptr) return '';
var end = findStringEnd(HEAPU8, ptr, maxBytesToRead, ignoreNul);
return UTF8Decoder.decode({{{ getUnsharedTextDecoderView('HEAPU8', 'ptr', 'end') }}});
#else
return ptr ? UTF8ArrayToString(HEAPU8, ptr, maxBytesToRead, ignoreNul) : '';
#endif
},
/**
* Copies the given Javascript String object 'str' to the given byte array at
* address 'outIdx', encoded in UTF8 form and null-terminated. The copy will
* require at most str.length*4+1 bytes of space in the HEAP. Use the function
* lengthBytesUTF8 to compute the exact number of bytes (excluding null
* terminator) that this function will write.
*
* @param {string} str - The Javascript string to copy.
* @param {ArrayBufferView|Array<number>} heap - The array to copy to. Each
* index in this array is assumed
* to be one 8-byte element.
* @param {number} outIdx - The starting offset in the array to begin the copying.
* @param {number} maxBytesToWrite - The maximum number of bytes this function
* can write to the array. This count should
* include the null terminator, i.e. if
* maxBytesToWrite=1, only the null terminator
* will be written and nothing else.
* maxBytesToWrite=0 does not write any bytes
* to the output, not even the null
* terminator.
* @return {number} The number of bytes written, EXCLUDING the null terminator.
*/
#if ASSERTIONS
$stringToUTF8Array__deps: ['$warnOnce'],
#endif
$stringToUTF8Array: (str, heap, outIdx, maxBytesToWrite) => {
#if CAN_ADDRESS_2GB
outIdx >>>= 0;
#endif
#if ASSERTIONS
assert(typeof str === 'string', `stringToUTF8Array expects a string (got ${typeof str})`);
#endif
// Parameter maxBytesToWrite is not optional. Negative values, 0, null,
// undefined and false each don't write out any bytes.
if (!(maxBytesToWrite > 0))
return 0;
var startIdx = outIdx;
var endIdx = outIdx + maxBytesToWrite - 1; // -1 for string null terminator.
for (var i = 0; i < str.length; ++i) {
// For UTF8 byte structure, see http://en.wikipedia.org/wiki/UTF-8#Description
// and https://www.ietf.org/rfc/rfc2279.txt
// and https://tools.ietf.org/html/rfc3629
var u = str.codePointAt(i);
if (u <= 0x7F) {
if (outIdx >= endIdx) break;
heap[outIdx++] = u;
} else if (u <= 0x7FF) {
if (outIdx + 1 >= endIdx) break;
heap[outIdx++] = 0xC0 | (u >> 6);
heap[outIdx++] = 0x80 | (u & 63);
} else if (u <= 0xFFFF) {
if (outIdx + 2 >= endIdx) break;
heap[outIdx++] = 0xE0 | (u >> 12);
heap[outIdx++] = 0x80 | ((u >> 6) & 63);
heap[outIdx++] = 0x80 | (u & 63);
} else {
if (outIdx + 3 >= endIdx) break;
#if ASSERTIONS
if (u > 0x10FFFF) warnOnce('Invalid Unicode code point ' + ptrToString(u) + ' encountered when serializing a JS string to a UTF-8 string in wasm memory! (Valid unicode code points should be in range 0-0x10FFFF).');
#endif
heap[outIdx++] = 0xF0 | (u >> 18);
heap[outIdx++] = 0x80 | ((u >> 12) & 63);
heap[outIdx++] = 0x80 | ((u >> 6) & 63);
heap[outIdx++] = 0x80 | (u & 63);
// Gotcha: if codePoint is over 0xFFFF, it is represented as a surrogate pair in UTF-16.
// We need to manually skip over the second code unit for correct iteration.
i++;
}
}
// Null-terminate the pointer to the buffer.
heap[outIdx] = 0;
return outIdx - startIdx;
},
/**
* Copies the given Javascript String object 'str' to the emscripten HEAP at
* address 'outPtr', null-terminated and encoded in UTF8 form. The copy will
* require at most str.length*4+1 bytes of space in the HEAP.
* Use the function lengthBytesUTF8 to compute the exact number of bytes
* (excluding null terminator) that this function will write.
*
* @return {number} The number of bytes written, EXCLUDING the null terminator.
*/
$stringToUTF8__deps: ['$stringToUTF8Array'],
$stringToUTF8: (str, outPtr, maxBytesToWrite) => {
#if ASSERTIONS
assert(typeof maxBytesToWrite == 'number', 'stringToUTF8(str, outPtr, maxBytesToWrite) is missing the third parameter that specifies the length of the output buffer!');
#endif
return stringToUTF8Array(str, HEAPU8, outPtr, maxBytesToWrite);
},
/**
* Returns the number of bytes the given Javascript string takes if encoded as a
* UTF8 byte array, EXCLUDING the null terminator byte.
*
* @param {string} str - JavaScript string to operator on
* @return {number} Length, in bytes, of the UTF8 encoded string.
*/
$lengthBytesUTF8: (str) => {
var len = 0;
for (var i = 0; i < str.length; ++i) {
// Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code
// unit, not a Unicode code point of the character! So decode
// UTF16->UTF32->UTF8.
// See http://unicode.org/faq/utf_bom.html#utf16-3
var c = str.charCodeAt(i); // possibly a lead surrogate
if (c <= 0x7F) {
len++;
} else if (c <= 0x7FF) {
len += 2;
} else if (c >= 0xD800 && c <= 0xDFFF) {
len += 4; ++i;
} else {
len += 3;
}
}
return len;
},
$intArrayFromString__docs: '/** @type {function(string, boolean=, number=)} */',
$intArrayFromString__deps: ['$lengthBytesUTF8', '$stringToUTF8Array'],
$intArrayFromString: (stringy, dontAddNull, length) => {
var len = length > 0 ? length : lengthBytesUTF8(stringy)+1;
var u8array = new Array(len);
var numBytesWritten = stringToUTF8Array(stringy, u8array, 0, u8array.length);
if (dontAddNull) u8array.length = numBytesWritten;
return u8array;
},
$intArrayToString: (array) => {
var ret = [];
for (var i = 0; i < array.length; i++) {
var chr = array[i];
if (chr > 0xFF) {
#if ASSERTIONS
assert(false, `Character code ${chr} (${String.fromCharCode(chr)}) at offset ${i} not in 0x00-0xFF.`);
#endif
chr &= 0xFF;
}
ret.push(String.fromCharCode(chr));
}
return ret.join('');
},
// Given a pointer 'ptr' to a null-terminated ASCII-encoded string in the
// emscripten HEAP, returns a copy of that string as a Javascript String
// object.
$AsciiToString: (ptr) => {
#if CAN_ADDRESS_2GB
ptr >>>= 0;
#endif
var str = '';
while (1) {
var ch = {{{ makeGetValue('ptr++', 0, 'u8') }}};
if (!ch) return str;
str += String.fromCharCode(ch);
}
},
// Copies the given Javascript String object 'str' to the emscripten HEAP at
// address 'outPtr', null-terminated and encoded in ASCII form. The copy will
// require at most str.length+1 bytes of space in the HEAP.
$stringToAscii: (str, buffer) => {
for (var i = 0; i < str.length; ++i) {
#if ASSERTIONS
assert(str.charCodeAt(i) === (str.charCodeAt(i) & 0xff));
#endif
{{{ makeSetValue('buffer++', 0, 'str.charCodeAt(i)', 'i8') }}};
}
// Null-terminate the string
{{{ makeSetValue('buffer', 0, 0, 'i8') }}};
},
#if TEXTDECODER == 2
$UTF16Decoder: "new TextDecoder('utf-16le');",
#else
$UTF16Decoder: "typeof TextDecoder != 'undefined' ? new TextDecoder('utf-16le') : undefined;",
#endif
// Given a pointer 'ptr' to a null-terminated UTF16LE-encoded string in the
// emscripten HEAP, returns a copy of that string as a Javascript String
// object.
$UTF16ToString__deps: ['$UTF16Decoder', '$findStringEnd'],
$UTF16ToString: (ptr, maxBytesToRead, ignoreNul) => {
#if ASSERTIONS
assert(ptr % 2 == 0, 'Pointer passed to UTF16ToString must be aligned to two bytes!');
#endif
var idx = {{{ getHeapOffset('ptr', 'u16') }}};
var endIdx = findStringEnd(HEAPU16, idx, maxBytesToRead / 2, ignoreNul);
#if TEXTDECODER != 2
// When using conditional TextDecoder, skip it for short strings as the overhead of the native call is not worth it.
if (endIdx - idx > 16 && UTF16Decoder)
#endif // TEXTDECODER != 2
return UTF16Decoder.decode({{{ getUnsharedTextDecoderView('HEAPU16', 'idx', 'endIdx') }}});
#if TEXTDECODER != 2
// Fallback: decode without UTF16Decoder
var str = '';
// If maxBytesToRead is not passed explicitly, it will be undefined, and the
// for-loop's condition will always evaluate to true. The loop is then
// terminated on the first null char.
for (var i = idx; i < endIdx; ++i) {
var codeUnit = HEAPU16[i];
// fromCharCode constructs a character from a UTF-16 code unit, so we can
// pass the UTF16 string right through.
str += String.fromCharCode(codeUnit);
}
return str;
#endif // TEXTDECODER != 2
},
// Copies the given Javascript String object 'str' to the emscripten HEAP at
// address 'outPtr', null-terminated and encoded in UTF16 form. The copy will
// require at most str.length*4+2 bytes of space in the HEAP. Use the
// function lengthBytesUTF16() to compute the exact number of bytes (excluding
// null terminator) that this function will write.
// Parameters:
// str: the Javascript string to copy.
// outPtr: Byte address in Emscripten HEAP where to write the string to.
// maxBytesToWrite: The maximum number of bytes this function can write to
// the array. This count should include the null
// terminator, i.e. if maxBytesToWrite=2, only the null
// terminator will be written and nothing else.
// maxBytesToWrite<2 does not write any bytes to the
// output, not even the null terminator.
// Returns the number of bytes written, EXCLUDING the null terminator.
$stringToUTF16: (str, outPtr, maxBytesToWrite) => {
#if ASSERTIONS
assert(outPtr % 2 == 0, 'Pointer passed to stringToUTF16 must be aligned to two bytes!');
#endif
#if ASSERTIONS
assert(typeof maxBytesToWrite == 'number', 'stringToUTF16(str, outPtr, maxBytesToWrite) is missing the third parameter that specifies the length of the output buffer!');
#endif
// Backwards compatibility: if max bytes is not specified, assume unsafe unbounded write is allowed.
maxBytesToWrite ??= 0x7FFFFFFF;
if (maxBytesToWrite < 2) return 0;
maxBytesToWrite -= 2; // Null terminator.
var startPtr = outPtr;
var numCharsToWrite = (maxBytesToWrite < str.length*2) ? (maxBytesToWrite / 2) : str.length;
for (var i = 0; i < numCharsToWrite; ++i) {
// charCodeAt returns a UTF-16 encoded code unit, so it can be directly written to the HEAP.
var codeUnit = str.charCodeAt(i); // possibly a lead surrogate
{{{ makeSetValue('outPtr', 0, 'codeUnit', 'i16') }}};
outPtr += 2;
}
// Null-terminate the pointer to the HEAP.
{{{ makeSetValue('outPtr', 0, 0, 'i16') }}};
return outPtr - startPtr;
},
// Returns the number of bytes the given Javascript string takes if encoded as
// a UTF16 byte array, EXCLUDING the null terminator byte.
$lengthBytesUTF16: (str) => str.length*2,
$UTF32ToString: (ptr, maxBytesToRead, ignoreNul) => {
#if ASSERTIONS
assert(ptr % 4 == 0, 'Pointer passed to UTF32ToString must be aligned to four bytes!');
#endif
var str = '';
var startIdx = {{{ getHeapOffset('ptr', 'u32') }}};
// If maxBytesToRead is not passed explicitly, it will be undefined, and this
// will always evaluate to true. This saves on code size.
for (var i = 0; !(i >= maxBytesToRead / 4); i++) {
var utf32 = HEAPU32[startIdx + i];
if (!utf32 && !ignoreNul) break;
str += String.fromCodePoint(utf32);
}
return str;
},
// Copies the given Javascript String object 'str' to the emscripten HEAP at
// address 'outPtr', null-terminated and encoded in UTF32 form. The copy will
// require at most str.length*4+4 bytes of space in the HEAP.
// Use the function lengthBytesUTF32() to compute the exact number of bytes
// (excluding null terminator) that this function will write.
// Parameters:
// str: the Javascript string to copy.
// outPtr: Byte address in Emscripten HEAP where to write the string to.
// maxBytesToWrite: The maximum number of bytes this function can write to
// the array. This count should include the null
// terminator, i.e. if maxBytesToWrite=4, only the null
// terminator will be written and nothing else.
// maxBytesToWrite<4 does not write any bytes to the
// output, not even the null terminator.
// Returns the number of bytes written, EXCLUDING the null terminator.
$stringToUTF32: (str, outPtr, maxBytesToWrite) => {
#if CAN_ADDRESS_2GB
outPtr >>>= 0;
#endif
#if ASSERTIONS
assert(outPtr % 4 == 0, 'Pointer passed to stringToUTF32 must be aligned to four bytes!');
#endif
#if ASSERTIONS
assert(typeof maxBytesToWrite == 'number', 'stringToUTF32(str, outPtr, maxBytesToWrite) is missing the third parameter that specifies the length of the output buffer!');
#endif
// Backwards compatibility: if max bytes is not specified, assume unsafe unbounded write is allowed.
maxBytesToWrite ??= 0x7FFFFFFF;
if (maxBytesToWrite < 4) return 0;
var startPtr = outPtr;
var endPtr = startPtr + maxBytesToWrite - 4;
for (var i = 0; i < str.length; ++i) {
var codePoint = str.codePointAt(i);
// Gotcha: if codePoint is over 0xFFFF, it is represented as a surrogate pair in UTF-16.
// We need to manually skip over the second code unit for correct iteration.
if (codePoint > 0xFFFF) {
i++;
}
{{{ makeSetValue('outPtr', 0, 'codePoint', 'i32') }}};
outPtr += 4;
if (outPtr + 4 > endPtr) break;
}
// Null-terminate the pointer to the HEAP.
{{{ makeSetValue('outPtr', 0, 0, 'i32') }}};
return outPtr - startPtr;
},
// Returns the number of bytes the given Javascript string takes if encoded as
// a UTF16 byte array, EXCLUDING the null terminator byte.
$lengthBytesUTF32: (str) => {
var len = 0;
for (var i = 0; i < str.length; ++i) {
var codePoint = str.codePointAt(i);
// Gotcha: if codePoint is over 0xFFFF, it is represented as a surrogate pair in UTF-16.
// We need to manually skip over the second code unit for correct iteration.
if (codePoint > 0xFFFF) {
i++;
}
len += 4;
}
return len;
},
// Allocate heap space for a JS string, and write it there.
// It is the responsibility of the caller to free() that memory.
$stringToNewUTF8__deps: ['$lengthBytesUTF8', '$stringToUTF8', 'malloc'],
$stringToNewUTF8: (str) => {
var size = lengthBytesUTF8(str) + 1;
var ret = _malloc(size);
if (ret) stringToUTF8(str, ret, size);
return ret;
},
// Allocate stack space for a JS string, and write it there.
$stringToUTF8OnStack__deps: ['$lengthBytesUTF8', '$stringToUTF8', '$stackAlloc'],
$stringToUTF8OnStack: (str) => {
var size = lengthBytesUTF8(str) + 1;
var ret = stackAlloc(size);
stringToUTF8(str, ret, size);
return ret;
},
$writeArrayToMemory: (array, buffer) => {
#if ASSERTIONS
assert(array.length >= 0, 'writeArrayToMemory array must have a length (should be an array or typed array)')
#endif
HEAP8.set(array, buffer);
},
});