binary/utf8.js - external/github.com/protocolbuffers/protobuf-javascript - Git at Google

 /**
  * @fileoverview UTF8 encoding and decoding routines
  */
 goog.provide('jspb.binary.utf8');

 goog.require('jspb.asserts');


 /**
  * Whether to use the browser based `TextEncoder` and `TextDecoder` APIs for
  * handling utf8.
  *
  * <p>Enabled by default for `goog.FEATURESET_YEAR >= 2020`.  The code also
  * performs feature detection for this API and will always use it if available,
  * this variable enables us to not ship the polyfill.
  *
  * <p>See http://go/jscompiler-flags#browser-featureset-year-options for the
  * behavior here.
  *
  * @define {boolean}
  */
 const USE_TEXT_ENCODING =
   goog.define('jspb.binary.USE_TEXTENCODING', goog.FEATURESET_YEAR >= 2020);

 const /** number */ MIN_SURROGATE = 0xD800;
 const /** number */ MIN_HIGH_SURROGATE = MIN_SURROGATE;
 const /** number */ MAX_HIGH_SURROGATE = 0xDBFF;
 const /** number */ MIN_LOW_SURROGATE = 0xDC00;
 const /** number */ MAX_LOW_SURROGATE = 0xDFFF;
 const /** number */ MAX_SURROGATE = MAX_LOW_SURROGATE;

 /**
  * Returns whether the byte is not a valid continuation of the form
  * '10XXXXXX'.
  * @return {boolean}
  */
 function isNotTrailingByte(/** number */ byte) {
   // 0xC0 is '11000000' in binary
   // 0x80 is '10000000' in binary
   return (byte & 0xC0) !== 0x80;
 }


 /**
  * Either throws an error or appends a replacement codepoint of invalid utf8
  */
 function invalid(
     /** boolean */ parsingErrorsAreFatal, /** !Array<number> */ codeUnits) {
   if (parsingErrorsAreFatal) {
     throw new Error('Invalid UTF8');
   }
   codeUnits.push(0xFFFD);  // utf8 replacement character
 }

 /** @return {string} */
 function codeUnitsToString(
     /** string? */ accum, /** !Array<number> */ utf16CodeUnits) {
   const suffix = String.fromCharCode.apply(null, utf16CodeUnits);
   return accum == null ? suffix : accum + suffix;
 }

 /**
  * Our handwritten UTF8 decoder.
  *
  * https://en.wikipedia.org/wiki/UTF-8#Encoding describes the bit layout
  *
  * https://en.wikipedia.org/wiki/UTF-8#Invalid_sequences_and_error_handling
  * describes important cases to check for which are namely:
  * - overlong encodings, meaning a value expressable in N bytes could have been
  * expressed in fewer bytes
  * - invalid bytes, meaning bytes that are generally out of range
  * - surrogate codepoints, utf8 never encodes directly a utf16 surrogate value
  * - underflow where there aren't enough bytes for the sequence we are parsing
  * - out of range codepoints.
  *
  * @return {string}
  */
 jspb.binary.utf8.polyfillDecodeUtf8 = function (
     /** !Uint8Array */ bytes, /** number */ offset, /** number */ length,
     /** boolean */ parsingErrorsAreFatal) {
   let cursor = offset;
   const end = cursor + length;
   const codeUnits = [];
   let result = null;

   // This is significantly slower than the TextDecoder implementation.
   // Ideas for improving performance:
   // 1. Reduce branching with non-shortcircuting operators, e.g.
   // https://stackoverflow.com/q/5652363
   // 2. improve isNotTrailingByte using xor?
   // 3. consider having a dedicate ascii loop (java impls do this)
   let c1, c2, c3, c4;
   while (cursor < end) {
     c1 = bytes[cursor++];
     if (c1 < 0x80) {  // Regular 7-bit ASCII.
       codeUnits.push(c1);
     } else if (c1 < 0xE0) {  // UTF-8 with two bytes.
       if (cursor >= end) {
         invalid(parsingErrorsAreFatal, codeUnits);
       } else {
         c2 = bytes[cursor++];
         // Make sure that c1 is a valid leading byte and c2 is a valid
         // trailing byte
         // 0xC2 is '11000010', if c1 is less than this then we have an overlong
         // encoding because there would only be 7 significant bits.
         if (c1 < 0xC2 || isNotTrailingByte(c2)) {
           cursor--;  // push c2 back since it isn't 'accepted'
           invalid(parsingErrorsAreFatal, codeUnits);
         } else {
           // The codeUnit is the lower 6 bits from c2 and the lower 5 bits from
           // c1
           const codeUnit = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
           // Consistency check that the computed code is in range for a 2 byte
           // sequence.
           jspb.asserts.assert(codeUnit >= 0x80 && codeUnit <= 0x07FF);
           codeUnits.push(codeUnit);
         }
       }
     } else if (c1 < 0xF0) {  // UTF-8 with three bytes.
       if (cursor >= end - 1) {
         invalid(parsingErrorsAreFatal, codeUnits);
       } else {
         c2 = bytes[cursor++];
         if (isNotTrailingByte(c2) ||
           // These checks were taken from
           // java/com/google/protobuf/Utf8.java
           // overlong? 5 most significant bits must not all be zero
           (c1 === 0xE0 && c2 < 0xA0)
           // check for illegal surrogate codepoints
           || (c1 === 0xED && c2 >= 0xA0) ||
           // We delay reading c3 until now so than an error in c2 or c1 will
           // preserve c3 for the next loop iteration
           isNotTrailingByte(c3 = bytes[cursor++])) {
           cursor--;  // push back c2 or c3, depending on how far we made it
           invalid(parsingErrorsAreFatal, codeUnits);
         } else {
           // 4 bits from the first byte
           // 6 bits from each of the two lower bytes
           // == 16 bits total
           const codeUnit =
             ((c1 & 0xF) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F);
           // Consistency check, this is the valid range for a 3 byte character
           jspb.asserts.assert(codeUnit >= 0x800 && codeUnit <= 0xFFFF);
           // And that Utf16 surrogates are disallowed
           jspb.asserts.assert(codeUnit < MIN_SURROGATE || codeUnit > MAX_SURROGATE);
           codeUnits.push(codeUnit);
         }
       }
     } else if (c1 <= 0xF4) {  // UTF-8 with 4 bytes.
       // 0xF8 matches the bitpattern for utf8 with 4 bytes, but all leading
       // bytes > 0xF4 are either overlong encodings or exceed the valid range.
       if (cursor >= end - 2) {
         invalid(parsingErrorsAreFatal, codeUnits);
       } else {
         c2 = bytes[cursor++];
         if (isNotTrailingByte(c2) ||
           // This check was inspired by
           // java/com/google/protobuf/Utf8.java
           // Tricky optimized form of:
           //   valid 4-byte leading byte?
           // if (byte1 > (byte) 0xF4 ||
           //   overlong? 4 most significant bits must not all be zero
           //     byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
           //   codepoint larger than the highest code point (U+10FFFF)?
           //     byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
           (((c1 << 28) + (c2 - 0x90)) >> 30) !== 0 ||
           // We delay reading c3 and c4 until now so than an error in c2 or c1
           // will preserve them for the next loop iteration.
           isNotTrailingByte(c3 = bytes[cursor++]) ||
           isNotTrailingByte(c4 = bytes[cursor++])) {
           cursor--;  // push back c2, c3 or c4 depending on how far we made it
           invalid(parsingErrorsAreFatal, codeUnits);
         } else {
           // Characters written on 4 bytes have 21 bits for a codepoint.
           // We can't fit that on 16bit characters, so we use surrogates.
           // 3 bits from the uppermost byte, 6 bits from each of the lower 3
           // bytes. This is 21 bits which is too big for a 16 bit utf16 code
           // unit so we use surrogates.
           let codepoint = ((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12) |
             ((c3 & 0x3F) << 6) | (c4 & 0x3F);
           // Consistency check, this is the valid range for a 4 byte character.
           jspb.asserts.assert(codepoint >= 0x10000 && codepoint <= 0x10FFFF);
           // Surrogates formula from wikipedia.
           // 1. Subtract 0x10000 from codepoint
           codepoint -= 0x10000;
           // 2. Split this into the high 10-bit value and the low 10-bit value
           // 3. Add 0xD800 to the high value to form the high surrogate
           // 4. Add 0xDC00 to the low value to form the low surrogate:
           const low = (codepoint & 0x3FF) + MIN_LOW_SURROGATE;
           const high = ((codepoint >> 10) & 0x3FF) + MIN_HIGH_SURROGATE;
           codeUnits.push(high, low);
         }
       }
     } else {
       // initial byte is too large for utf8
       invalid(parsingErrorsAreFatal, codeUnits);
     }
     // Accumulate as we go to avoid exceeding the maximum stack size when
     // calling `apply`.
     if (codeUnits.length >= 8192) {
       result = codeUnitsToString(result, codeUnits);
       codeUnits.length = 0;
     }
   }
   // ensure we don't overflow or underflow
   jspb.asserts.assert(cursor === end, `expected ${cursor} === ${end}`);
   return codeUnitsToString(result, codeUnits);
 }


 /** @type {boolean|undefined} */
 let isFatalTextDecoderCachableAfterThrowing_ =
   // chrome version >= 2020 are not subject to https://crbug.com/910292
   goog.FEATURESET_YEAR >= 2020 ? true : undefined;

 /** @return {boolean} */
 function isFatalTextDecoderCachableAfterThrowing(/** !TextDecoder */ decoder) {
   // Test if the decoder is subject to https://crbug.com/910292
   // chrome versions with this bug cause one failed decode to cause all later
   // decodes to throw.
   if (isFatalTextDecoderCachableAfterThrowing_ === undefined) {
     // In theory we shouldn't need to generate an error here since this function
     // is only called in the context of a failed decode.  However, the buggy
     // chrome versions are not 'consistent' in corrupting their internal state
     // since it depends on where in the decode stream the error occurs.  This
     // error however does consistently trigger the bug based on manual testing.
     try {
       // A lonely continuation byte
       decoder.decode(new Uint8Array([0x80]));
     } catch (e) {
       // expected
     }
     try {
       // 'a' in hex
       decoder.decode(new Uint8Array([0x61]));
       isFatalTextDecoderCachableAfterThrowing_ = true;
     } catch (e) {
       // This decode should not throw, if it does it means our chrome version
       // is buggy and we need to flush our cached decoder when failures occur
       isFatalTextDecoderCachableAfterThrowing_ = false;
     }
   }
   return isFatalTextDecoderCachableAfterThrowing_;
 }

 /** @type {!TextDecoder|undefined} */
 let fatalDecoderInstance;

 /** @return {!TextDecoder}*/
 function getFatalDecoderInstance() {
   let instance = fatalDecoderInstance;
   if (!instance) {
     instance = fatalDecoderInstance = new TextDecoder('utf-8', { fatal: true });
   }
   return instance;
 }

 /** @type {!TextDecoder|undefined} */
 let nonFatalDecoderInstance;

 /** @return {!TextDecoder}*/
 function getNonFatalDecoderInstance() {
   let instance = nonFatalDecoderInstance;
   if (!instance) {
     instance = nonFatalDecoderInstance =
       new TextDecoder('utf-8', { fatal: false });
   }
   return instance;
 }

 /**
  * A `subarray` implementation that avoids calling `subarray` if it isn't needed
  *
  * `subarray` tends to be surprisingly slow.
  * @return {!Uint8Array}
  */
 function subarray(
     /** !Uint8Array*/ bytes, /** number */ offset, /** number */ end) {
   return offset === 0 && end === bytes.length ? bytes :
     bytes.subarray(offset, end);
 }

 /**
  * @return {string}
  */
 jspb.binary.utf8.textDecoderDecodeUtf8 = function (
     /** !Uint8Array*/ bytes, /** number */ offset, /** number */ length,
     /** boolean*/ parsingErrorsAreFatal) {
   const /** !TextDecoder */ decoder = parsingErrorsAreFatal ?
     getFatalDecoderInstance() :
     getNonFatalDecoderInstance();

   bytes = subarray(bytes, offset, offset + length);
   try {
     return decoder.decode(bytes);
   } catch (e) {
     if (parsingErrorsAreFatal &&
       !isFatalTextDecoderCachableAfterThrowing(decoder)) {
       fatalDecoderInstance = undefined;
     }
     throw e;
   }
 }

 /** @const {boolean} */
 const useTextDecoderDecode =
   USE_TEXT_ENCODING || typeof TextDecoder !== 'undefined';

 /**
  * A utf8 decoding routine either based upon TextDecoder if available or using
  * our polyfill implementation
  * @return {string}
  */
 jspb.binary.utf8.decodeUtf8 = function (
     /** !Uint8Array*/ bytes, /** number */ offset, /** number */ length,
     /** boolean*/ parsingErrorsAreFatal) {
   return useTextDecoderDecode ?
     jspb.binary.utf8.textDecoderDecodeUtf8(bytes, offset, length, parsingErrorsAreFatal) :
     jspb.binary.utf8.polyfillDecodeUtf8(bytes, offset, length, parsingErrorsAreFatal);
 }

 /** @type {!TextEncoder|undefined} */
 let textEncoderInstance;

 /** @return {!Uint8Array} */
 jspb.binary.utf8.textEncoderEncode = function (
     /** string */ s, /** boolean */ rejectUnpairedSurrogates) {
   if (rejectUnpairedSurrogates) {
     jspb.binary.utf8.checkWellFormed(s);
   }

   if (!textEncoderInstance) {
     textEncoderInstance = new TextEncoder();
   }
   return textEncoderInstance.encode(s);
 }

 // isWellFormed landed in major browsers in early 2023 so it will only be
 // definitely available in 2024 See
 // http://go/mdn/JavaScript/Reference/Global_Objects/String/isWellFormed
 const /** boolean */ HAS_WELL_FORMED_METHOD = goog.FEATURESET_YEAR > 2023 ||
   typeof String.prototype.isWellFormed === 'function';

 jspb.binary.utf8.checkWellFormed = function (/** string */ text) {
   if (HAS_WELL_FORMED_METHOD ?
     // Externs don't contain the definition of this function yet.
     // http://go/mdn/JavaScript/Reference/Global_Objects/String/isWellFormed
     !(/** @type{{isWellFormed:function():boolean}}*/ (
                 /** @type {?} */ (text))
       .isWellFormed()) :
     /(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])/
       .test(text)) {
     throw new Error('Found an unpaired surrogate');
   }
 }


 /** @return {!Uint8Array} */
 jspb.binary.utf8.polyfillEncode = function (
     /** string */ s, /** boolean */ rejectUnpairedSurrogates) {
   let bi = 0;
   // The worse case is that every character requires 3 output bytes, so we
   // allocate for this.  This assumes that the buffer will be short lived.
   // Callers can always `slice` if needed
   const buffer = new Uint8Array(3 * s.length);
   for (let ci = 0; ci < s.length; ci++) {
     let c = s.charCodeAt(ci);
     if (c < 0x80) {
       buffer[bi++] = c;
     } else if (c < 0x800) {
       buffer[bi++] = (c >> 6) | 0xC0;
       buffer[bi++] = (c & 63) | 0x80;
     } else {
       jspb.asserts.assert(c < 65536);
       // Look for surrogates
       // First check if it is surrogate range
       if (c >= MIN_SURROGATE && c <= MAX_SURROGATE) {
         // is it a high surrogate?
         if (c <= MAX_HIGH_SURROGATE && ci < s.length) {
           const c2 = s.charCodeAt(++ci);
           if (c2 >= MIN_LOW_SURROGATE && c2 <= MAX_LOW_SURROGATE) {
             // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
             const codePoint =
               (c - MIN_SURROGATE) * 0x400 + c2 - MIN_LOW_SURROGATE + 0x10000;
             buffer[bi++] = (codePoint >> 18) | 0xF0;
             buffer[bi++] = ((codePoint >> 12) & 63) | 0x80;
             buffer[bi++] = ((codePoint >> 6) & 63) | 0x80;
             buffer[bi++] = (codePoint & 63) | 0x80;
             continue;
           } else {
             // else c2 not in low surrogate range, treat c as a lone surrogate
             // and back up ci so we process c2 on the next loop as an
             // independent character
             ci--;
           }
         }  // else c not a high surrogate
         if (rejectUnpairedSurrogates) {
           throw new Error('Found an unpaired surrogate');
         }
         c = 0xFFFD;  // Error! Unpaired surrogate
       }
       buffer[bi++] = (c >> 12) | 0xE0;
       buffer[bi++] = ((c >> 6) & 63) | 0x80;
       buffer[bi++] = (c & 63) | 0x80;
     }
   }
   return subarray(buffer, 0, bi);
 }

 /** @const {boolean} */
 const useTextEncoderEncode =
   (USE_TEXT_ENCODING || typeof TextEncoder !== 'undefined');

 /**
  * A utf8 encoding routine either based upon TextEncoder if available or using
  * our polyfill implementation
  * @return {!Uint8Array}
  */
 jspb.binary.utf8.encodeUtf8 = function (
     /**string*/ string, /** boolean=*/ rejectUnpairedSurrogates = false) {
   jspb.asserts.assertString(string);
   return useTextEncoderEncode ?
     jspb.binary.utf8.textEncoderEncode(string, rejectUnpairedSurrogates) :
     jspb.binary.utf8.polyfillEncode(string, rejectUnpairedSurrogates);
 }
	/**
	* @fileoverview UTF8 encoding and decoding routines
	*/
	goog.provide('jspb.binary.utf8');

	goog.require('jspb.asserts');


	/**
	* Whether to use the browser based `TextEncoder` and `TextDecoder` APIs for
	* handling utf8.
	*
	* <p>Enabled by default for `goog.FEATURESET_YEAR >= 2020`. The code also
	* performs feature detection for this API and will always use it if available,
	* this variable enables us to not ship the polyfill.
	*
	* <p>See http://go/jscompiler-flags#browser-featureset-year-options for the
	* behavior here.
	*
	* @define {boolean}
	*/
	const USE_TEXT_ENCODING =
	goog.define('jspb.binary.USE_TEXTENCODING', goog.FEATURESET_YEAR >= 2020);

	const /** number */ MIN_SURROGATE = 0xD800;
	const /** number */ MIN_HIGH_SURROGATE = MIN_SURROGATE;
	const /** number */ MAX_HIGH_SURROGATE = 0xDBFF;
	const /** number */ MIN_LOW_SURROGATE = 0xDC00;
	const /** number */ MAX_LOW_SURROGATE = 0xDFFF;
	const /** number */ MAX_SURROGATE = MAX_LOW_SURROGATE;

	/**
	* Returns whether the byte is not a valid continuation of the form
	* '10XXXXXX'.
	* @return {boolean}
	*/
	function isNotTrailingByte(/** number */ byte) {
	// 0xC0 is '11000000' in binary
	// 0x80 is '10000000' in binary
	return (byte & 0xC0) !== 0x80;
	}


	/**
	* Either throws an error or appends a replacement codepoint of invalid utf8
	*/
	function invalid(
	/** boolean / parsingErrorsAreFatal, /* !Array<number> */ codeUnits) {
	if (parsingErrorsAreFatal) {
	throw new Error('Invalid UTF8');
	}
	codeUnits.push(0xFFFD); // utf8 replacement character
	}

	/** @return {string} */
	function codeUnitsToString(
	/** string? / accum, /* !Array<number> */ utf16CodeUnits) {
	const suffix = String.fromCharCode.apply(null, utf16CodeUnits);
	return accum == null ? suffix : accum + suffix;
	}

	/**
	* Our handwritten UTF8 decoder.
	*
	* https://en.wikipedia.org/wiki/UTF-8#Encoding describes the bit layout
	*
	* https://en.wikipedia.org/wiki/UTF-8#Invalid_sequences_and_error_handling
	* describes important cases to check for which are namely:
	* - overlong encodings, meaning a value expressable in N bytes could have been
	* expressed in fewer bytes
	* - invalid bytes, meaning bytes that are generally out of range
	* - surrogate codepoints, utf8 never encodes directly a utf16 surrogate value
	* - underflow where there aren't enough bytes for the sequence we are parsing
	* - out of range codepoints.
	*
	* @return {string}
	*/
	jspb.binary.utf8.polyfillDecodeUtf8 = function (
	/** !Uint8Array / bytes, /* number / offset, /* number */ length,
	/** boolean */ parsingErrorsAreFatal) {
	let cursor = offset;
	const end = cursor + length;
	const codeUnits = [];
	let result = null;

	// This is significantly slower than the TextDecoder implementation.
	// Ideas for improving performance:
	// 1. Reduce branching with non-shortcircuting operators, e.g.
	// https://stackoverflow.com/q/5652363
	// 2. improve isNotTrailingByte using xor?
	// 3. consider having a dedicate ascii loop (java impls do this)
	let c1, c2, c3, c4;
	while (cursor < end) {
	c1 = bytes[cursor++];
	if (c1 < 0x80) { // Regular 7-bit ASCII.
	codeUnits.push(c1);
	} else if (c1 < 0xE0) { // UTF-8 with two bytes.
	if (cursor >= end) {
	invalid(parsingErrorsAreFatal, codeUnits);
	} else {
	c2 = bytes[cursor++];
	// Make sure that c1 is a valid leading byte and c2 is a valid
	// trailing byte
	// 0xC2 is '11000010', if c1 is less than this then we have an overlong
	// encoding because there would only be 7 significant bits.
	if (c1 < 0xC2 \|\| isNotTrailingByte(c2)) {
	cursor--; // push c2 back since it isn't 'accepted'
	invalid(parsingErrorsAreFatal, codeUnits);
	} else {
	// The codeUnit is the lower 6 bits from c2 and the lower 5 bits from
	// c1
	const codeUnit = ((c1 & 0x1F) << 6) \| (c2 & 0x3F);
	// Consistency check that the computed code is in range for a 2 byte
	// sequence.
	jspb.asserts.assert(codeUnit >= 0x80 && codeUnit <= 0x07FF);
	codeUnits.push(codeUnit);
	}
	}
	} else if (c1 < 0xF0) { // UTF-8 with three bytes.
	if (cursor >= end - 1) {
	invalid(parsingErrorsAreFatal, codeUnits);
	} else {
	c2 = bytes[cursor++];
	if (isNotTrailingByte(c2) \|\|
	// These checks were taken from
	// java/com/google/protobuf/Utf8.java
	// overlong? 5 most significant bits must not all be zero
	(c1 === 0xE0 && c2 < 0xA0)
	// check for illegal surrogate codepoints
	\|\| (c1 === 0xED && c2 >= 0xA0) \|\|
	// We delay reading c3 until now so than an error in c2 or c1 will
	// preserve c3 for the next loop iteration
	isNotTrailingByte(c3 = bytes[cursor++])) {
	cursor--; // push back c2 or c3, depending on how far we made it
	invalid(parsingErrorsAreFatal, codeUnits);
	} else {
	// 4 bits from the first byte
	// 6 bits from each of the two lower bytes
	// == 16 bits total
	const codeUnit =
	((c1 & 0xF) << 12) \| ((c2 & 0x3F) << 6) \| (c3 & 0x3F);
	// Consistency check, this is the valid range for a 3 byte character
	jspb.asserts.assert(codeUnit >= 0x800 && codeUnit <= 0xFFFF);
	// And that Utf16 surrogates are disallowed
	jspb.asserts.assert(codeUnit < MIN_SURROGATE \|\| codeUnit > MAX_SURROGATE);
	codeUnits.push(codeUnit);
	}
	}
	} else if (c1 <= 0xF4) { // UTF-8 with 4 bytes.
	// 0xF8 matches the bitpattern for utf8 with 4 bytes, but all leading
	// bytes > 0xF4 are either overlong encodings or exceed the valid range.
	if (cursor >= end - 2) {
	invalid(parsingErrorsAreFatal, codeUnits);
	} else {
	c2 = bytes[cursor++];
	if (isNotTrailingByte(c2) \|\|
	// This check was inspired by
	// java/com/google/protobuf/Utf8.java
	// Tricky optimized form of:
	// valid 4-byte leading byte?
	// if (byte1 > (byte) 0xF4 \|\|
	// overlong? 4 most significant bits must not all be zero
	// byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 \|\|
	// codepoint larger than the highest code point (U+10FFFF)?
	// byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
	(((c1 << 28) + (c2 - 0x90)) >> 30) !== 0 \|\|
	// We delay reading c3 and c4 until now so than an error in c2 or c1
	// will preserve them for the next loop iteration.
	isNotTrailingByte(c3 = bytes[cursor++]) \|\|
	isNotTrailingByte(c4 = bytes[cursor++])) {
	cursor--; // push back c2, c3 or c4 depending on how far we made it
	invalid(parsingErrorsAreFatal, codeUnits);
	} else {
	// Characters written on 4 bytes have 21 bits for a codepoint.
	// We can't fit that on 16bit characters, so we use surrogates.
	// 3 bits from the uppermost byte, 6 bits from each of the lower 3
	// bytes. This is 21 bits which is too big for a 16 bit utf16 code
	// unit so we use surrogates.
	let codepoint = ((c1 & 0x7) << 18) \| ((c2 & 0x3F) << 12) \|
	((c3 & 0x3F) << 6) \| (c4 & 0x3F);
	// Consistency check, this is the valid range for a 4 byte character.
	jspb.asserts.assert(codepoint >= 0x10000 && codepoint <= 0x10FFFF);
	// Surrogates formula from wikipedia.
	// 1. Subtract 0x10000 from codepoint
	codepoint -= 0x10000;
	// 2. Split this into the high 10-bit value and the low 10-bit value
	// 3. Add 0xD800 to the high value to form the high surrogate
	// 4. Add 0xDC00 to the low value to form the low surrogate:
	const low = (codepoint & 0x3FF) + MIN_LOW_SURROGATE;
	const high = ((codepoint >> 10) & 0x3FF) + MIN_HIGH_SURROGATE;
	codeUnits.push(high, low);
	}
	}
	} else {
	// initial byte is too large for utf8
	invalid(parsingErrorsAreFatal, codeUnits);
	}
	// Accumulate as we go to avoid exceeding the maximum stack size when
	// calling `apply`.
	if (codeUnits.length >= 8192) {
	result = codeUnitsToString(result, codeUnits);
	codeUnits.length = 0;
	}
	}
	// ensure we don't overflow or underflow
	jspb.asserts.assert(cursor === end, `expected ${cursor} === ${end}`);
	return codeUnitsToString(result, codeUnits);
	}


	/** @type {boolean\|undefined} */
	let isFatalTextDecoderCachableAfterThrowing_ =
	// chrome version >= 2020 are not subject to https://crbug.com/910292
	goog.FEATURESET_YEAR >= 2020 ? true : undefined;

	/** @return {boolean} */
	function isFatalTextDecoderCachableAfterThrowing(/** !TextDecoder */ decoder) {
	// Test if the decoder is subject to https://crbug.com/910292
	// chrome versions with this bug cause one failed decode to cause all later
	// decodes to throw.
	if (isFatalTextDecoderCachableAfterThrowing_ === undefined) {
	// In theory we shouldn't need to generate an error here since this function
	// is only called in the context of a failed decode. However, the buggy
	// chrome versions are not 'consistent' in corrupting their internal state
	// since it depends on where in the decode stream the error occurs. This
	// error however does consistently trigger the bug based on manual testing.
	try {
	// A lonely continuation byte
	decoder.decode(new Uint8Array([0x80]));
	} catch (e) {
	// expected
	}
	try {
	// 'a' in hex
	decoder.decode(new Uint8Array([0x61]));
	isFatalTextDecoderCachableAfterThrowing_ = true;
	} catch (e) {
	// This decode should not throw, if it does it means our chrome version
	// is buggy and we need to flush our cached decoder when failures occur
	isFatalTextDecoderCachableAfterThrowing_ = false;
	}
	}
	return isFatalTextDecoderCachableAfterThrowing_;
	}

	/** @type {!TextDecoder\|undefined} */
	let fatalDecoderInstance;

	/** @return {!TextDecoder}*/
	function getFatalDecoderInstance() {
	let instance = fatalDecoderInstance;
	if (!instance) {
	instance = fatalDecoderInstance = new TextDecoder('utf-8', { fatal: true });
	}
	return instance;
	}

	/** @type {!TextDecoder\|undefined} */
	let nonFatalDecoderInstance;

	/** @return {!TextDecoder}*/
	function getNonFatalDecoderInstance() {
	let instance = nonFatalDecoderInstance;
	if (!instance) {
	instance = nonFatalDecoderInstance =
	new TextDecoder('utf-8', { fatal: false });
	}
	return instance;
	}

	/**
	* A `subarray` implementation that avoids calling `subarray` if it isn't needed
	*
	* `subarray` tends to be surprisingly slow.
	* @return {!Uint8Array}
	*/
	function subarray(
	/** !Uint8Array/ bytes, /* number / offset, /* number */ end) {
	return offset === 0 && end === bytes.length ? bytes :
	bytes.subarray(offset, end);
	}

	/**
	* @return {string}
	*/
	jspb.binary.utf8.textDecoderDecodeUtf8 = function (
	/** !Uint8Array/ bytes, /* number / offset, /* number */ length,
	/** boolean*/ parsingErrorsAreFatal) {
	const /** !TextDecoder */ decoder = parsingErrorsAreFatal ?
	getFatalDecoderInstance() :
	getNonFatalDecoderInstance();

	bytes = subarray(bytes, offset, offset + length);
	try {
	return decoder.decode(bytes);
	} catch (e) {
	if (parsingErrorsAreFatal &&
	!isFatalTextDecoderCachableAfterThrowing(decoder)) {
	fatalDecoderInstance = undefined;
	}
	throw e;
	}
	}

	/** @const {boolean} */
	const useTextDecoderDecode =
	USE_TEXT_ENCODING \|\| typeof TextDecoder !== 'undefined';

	/**
	* A utf8 decoding routine either based upon TextDecoder if available or using
	* our polyfill implementation
	* @return {string}
	*/
	jspb.binary.utf8.decodeUtf8 = function (
	/** !Uint8Array/ bytes, /* number / offset, /* number */ length,
	/** boolean*/ parsingErrorsAreFatal) {
	return useTextDecoderDecode ?
	jspb.binary.utf8.textDecoderDecodeUtf8(bytes, offset, length, parsingErrorsAreFatal) :
	jspb.binary.utf8.polyfillDecodeUtf8(bytes, offset, length, parsingErrorsAreFatal);
	}

	/** @type {!TextEncoder\|undefined} */
	let textEncoderInstance;

	/** @return {!Uint8Array} */
	jspb.binary.utf8.textEncoderEncode = function (
	/** string / s, /* boolean */ rejectUnpairedSurrogates) {
	if (rejectUnpairedSurrogates) {
	jspb.binary.utf8.checkWellFormed(s);
	}

	if (!textEncoderInstance) {
	textEncoderInstance = new TextEncoder();
	}
	return textEncoderInstance.encode(s);
	}

	// isWellFormed landed in major browsers in early 2023 so it will only be
	// definitely available in 2024 See
	// http://go/mdn/JavaScript/Reference/Global_Objects/String/isWellFormed
	const /** boolean */ HAS_WELL_FORMED_METHOD = goog.FEATURESET_YEAR > 2023 \|\|
	typeof String.prototype.isWellFormed === 'function';

	jspb.binary.utf8.checkWellFormed = function (/** string */ text) {
	if (HAS_WELL_FORMED_METHOD ?
	// Externs don't contain the definition of this function yet.
	// http://go/mdn/JavaScript/Reference/Global_Objects/String/isWellFormed
	!(/** @type{{isWellFormed:function():boolean}}*/ (
	/** @type {?} */ (text))
	.isWellFormed()) :
	/(?:[^\uD800-\uDBFF]\|^)[\uDC00-\uDFFF]\|[\uD800-\uDBFF](?![\uDC00-\uDFFF])/
	.test(text)) {
	throw new Error('Found an unpaired surrogate');
	}
	}


	/** @return {!Uint8Array} */
	jspb.binary.utf8.polyfillEncode = function (
	/** string / s, /* boolean */ rejectUnpairedSurrogates) {
	let bi = 0;
	// The worse case is that every character requires 3 output bytes, so we
	// allocate for this. This assumes that the buffer will be short lived.
	// Callers can always `slice` if needed
	const buffer = new Uint8Array(3 * s.length);
	for (let ci = 0; ci < s.length; ci++) {
	let c = s.charCodeAt(ci);
	if (c < 0x80) {
	buffer[bi++] = c;
	} else if (c < 0x800) {
	buffer[bi++] = (c >> 6) \| 0xC0;
	buffer[bi++] = (c & 63) \| 0x80;
	} else {
	jspb.asserts.assert(c < 65536);
	// Look for surrogates
	// First check if it is surrogate range
	if (c >= MIN_SURROGATE && c <= MAX_SURROGATE) {
	// is it a high surrogate?
	if (c <= MAX_HIGH_SURROGATE && ci < s.length) {
	const c2 = s.charCodeAt(++ci);
	if (c2 >= MIN_LOW_SURROGATE && c2 <= MAX_LOW_SURROGATE) {
	// http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
	const codePoint =
	(c - MIN_SURROGATE) * 0x400 + c2 - MIN_LOW_SURROGATE + 0x10000;
	buffer[bi++] = (codePoint >> 18) \| 0xF0;
	buffer[bi++] = ((codePoint >> 12) & 63) \| 0x80;
	buffer[bi++] = ((codePoint >> 6) & 63) \| 0x80;
	buffer[bi++] = (codePoint & 63) \| 0x80;
	continue;
	} else {
	// else c2 not in low surrogate range, treat c as a lone surrogate
	// and back up ci so we process c2 on the next loop as an
	// independent character
	ci--;
	}
	} // else c not a high surrogate
	if (rejectUnpairedSurrogates) {
	throw new Error('Found an unpaired surrogate');
	}
	c = 0xFFFD; // Error! Unpaired surrogate
	}
	buffer[bi++] = (c >> 12) \| 0xE0;
	buffer[bi++] = ((c >> 6) & 63) \| 0x80;
	buffer[bi++] = (c & 63) \| 0x80;
	}
	}
	return subarray(buffer, 0, bi);
	}

	/** @const {boolean} */
	const useTextEncoderEncode =
	(USE_TEXT_ENCODING \|\| typeof TextEncoder !== 'undefined');

	/**
	* A utf8 encoding routine either based upon TextEncoder if available or using
	* our polyfill implementation
	* @return {!Uint8Array}
	*/
	jspb.binary.utf8.encodeUtf8 = function (
	/*string/ string, /** boolean=*/ rejectUnpairedSurrogates = false) {
	jspb.asserts.assertString(string);
	return useTextEncoderEncode ?
	jspb.binary.utf8.textEncoderEncode(string, rejectUnpairedSurrogates) :
	jspb.binary.utf8.polyfillEncode(string, rejectUnpairedSurrogates);
	}