Tools/TestWebKitAPI/Tests/WTF/RapidHash.cpp - external/github.com/WebKit/webkit - Git at Google

 /*
  * Copyright (C) 2013-2023 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */

 #include "config.h"
 #include <wtf/text/RapidHash.h>

 #include <cstdio>
 #include <wtf/text/StringHash.h>
 #include <wtf/text/StringImpl.h>
 #include <wtf/text/WTFString.h>

 namespace TestWebKitAPI {

 static const unsigned expected[256] = {
     15648162, 7379797, 2070420, 1145527, 8646054, 15241182, 9023636, 3432937, 5062235, 9411973, 8896458, 400898, 11288028, 15246497,
     16435911, 10710282, 11347848, 4926495, 5707730, 12461996, 15979074, 8192552, 4131547, 4233318, 12626894, 6755298, 7868599, 12823534,
     15827911, 14924241, 4759336, 1088812, 7488120, 15311191, 12013381, 16679728, 13118800, 13009161, 6484391, 4172168, 5167151, 817127,
     12403441, 2342506, 10144396, 11025206, 7062744, 15680250, 13612965, 869880, 4014223, 14373616, 4510356, 8595635, 5794747, 6413633,
     15160000, 8866527, 15428778, 12958164, 11046123, 13233833, 16651263, 8725559, 9176081, 14061736, 11344565, 5810756, 12458473, 3701732,
     6169408, 8744620, 13545725, 7498676, 10964582, 12211699, 6547798, 12410754, 15763191, 6742586, 12102147, 15835431, 7399542, 3934377,
     14165379, 16203007, 1429223, 8153632, 13217555, 9421910, 7722034, 804888, 13084468, 8451886, 8948988, 10345068, 15432129, 1274364,
     12502940, 4657399, 7753220, 2705153, 6018100, 9597385, 16276144, 7700322, 692044, 12527633, 3400946, 4513351, 11233477, 13209535,
     12920005, 12470278, 12416119, 5044722, 9607204, 6859191, 5691381, 15134985, 862804, 11678044, 9501460, 13797854, 9493874, 417362,
     11681400, 12943228, 6221477, 4274081, 9331847, 7330476, 9884564, 15862969, 6490954, 7551756, 13785547, 7802142, 12872755, 4615712,
     8904795, 1545520, 12927801, 5591092, 9366396, 11918163, 83670, 6325621, 8142363, 16462751, 3496810, 8595953, 1480256, 6721450,
     11516718, 11853227, 14404149, 12296203, 9395718, 7875681, 7096160, 3481091, 15127520, 8429186, 10728210, 10806655, 12191713, 14381579,
     14574049, 12323324, 2713101, 12030023, 6494449, 7096722, 10996011, 2863720, 789298, 8569965, 13120101, 13360858, 7175527, 76657,
     2755224, 2643920, 14137504, 1144911, 1672686, 10746076, 1935564, 4652328, 6334663, 15111256, 7944511, 4270370, 4440447, 4140331,
     14040329, 15929079, 13138253, 5570775, 13820680, 549163, 12115401, 14000058, 1581010, 13166429, 3102479, 12136319, 2626194, 10071173,
     14886108, 10021120, 964629, 11804913, 829378, 1004309, 6551740, 8857819, 15280313, 5586978, 1904222, 11159718, 3805985, 404066,
     3248840, 4377825, 9995210, 12087443, 2247276, 9838854, 15703402, 16102836, 11787949, 216832, 10266988, 4553076, 12253763, 924739,
     14137553, 13714506, 5107931, 4865914, 15687457, 13011562, 13781392, 15981974, 8615599, 793617, 12964706, 10511751, 7135570, 4958064,
     11359717, 16212004, 13049943, 13585204
 };

 TEST(WTF, RapidHasher)
 {
     auto generateLatin1Array = [&](size_t size) {
         auto array = std::unique_ptr<Latin1Character[]>(new Latin1Character[size]);
         for (size_t i = 0; i < size; i++)
             array[i] = i;
         return array;
     };

     auto generateUTF16Array = [&](size_t size) {
         auto array = std::unique_ptr<char16_t[]>(new char16_t[size]);
         for (size_t i = 0; i < size; i++)
             array[i] = i;
         return array;
     };

     unsigned max8Bit = std::numeric_limits<uint8_t>::max();
     for (size_t size = 0; size <= max8Bit; size++) {
         std::unique_ptr<const Latin1Character[]> arr1 = generateLatin1Array(size);
         std::unique_ptr<const char16_t[]> arr2 = generateUTF16Array(size);
         unsigned left = RapidHash::computeHashAndMaskTop8Bits(std::span { arr1.get(), size });
         unsigned right = RapidHash::computeHashAndMaskTop8Bits(std::span { arr2.get(), size });
         ASSERT_EQ(left, right);
         ASSERT_EQ(left, expected[size]);
     }
 }

 // Exercise the long-string branches in rapidhashCore (>16, >48, >96 bytes)
 // across the full range of sizes used by real-world strings, and confirm the
 // 8-bit vs 16-bit Latin1 invariant still holds.
 TEST(WTF, RapidHasher_LongStringInvariant)
 {
     constexpr size_t maxSize = 512;
     auto arr1Mut = std::unique_ptr<Latin1Character[]>(new Latin1Character[maxSize]);
     auto arr2Mut = std::unique_ptr<char16_t[]>(new char16_t[maxSize]);
     for (size_t i = 0; i < maxSize; ++i) {
         arr1Mut[i] = static_cast<Latin1Character>(i & 0xFF);
         arr2Mut[i] = static_cast<char16_t>(i & 0xFF);
     }
     const Latin1Character* arr1 = arr1Mut.get();
     const char16_t* arr2 = arr2Mut.get();
     for (size_t size = 0; size <= maxSize; ++size) {
         unsigned left = RapidHash::computeHashAndMaskTop8Bits(std::span { arr1, size });
         unsigned right = RapidHash::computeHashAndMaskTop8Bits(std::span { arr2, size });
         ASSERT_EQ(left, right) << "size=" << size;
     }
 }

 // A hash computed at compile time over a char16_t literal must equal both:
 // (a) the hash computed at runtime over the same char16_t buffer, and
 // (b) the hash of the equivalent Latin1 byte sequence (constexpr or runtime).
 // Pre-fix, the constexpr 16-bit path used rapidhashRawBytes16 (len=2N) while
 // runtime used rapidhashCompressed16 (len=N), so this would diverge.
 TEST(WTF, RapidHasher_ConstexprMatchesRuntime)
 {
     auto check = [](auto& literal8, auto& literal16) {
         constexpr size_t length = std::extent_v<std::remove_reference_t<decltype(literal8)>> - 1;
         static_assert(std::extent_v<std::remove_reference_t<decltype(literal16)>> - 1 == length);

         unsigned constexprHash16 = RapidHash::computeHashAndMaskTop8Bits<char16_t>(std::span { literal16, length });
         unsigned runtimeHash16 = RapidHash::computeHashAndMaskTop8Bits(std::span<const char16_t> { literal16, length });
         unsigned runtimeHash8 = RapidHash::computeHashAndMaskTop8Bits(std::span { reinterpret_cast<const Latin1Character*>(literal8), length });
         EXPECT_EQ(constexprHash16, runtimeHash16) << "length=" << length;
         EXPECT_EQ(constexprHash16, runtimeHash8) << "length=" << length;
     };

     {
         static constexpr char literal8[] = "x";
         static constexpr char16_t literal16[] = u"x";
         check(literal8, literal16);
     }
     {
         static constexpr char literal8[] = "ab";
         static constexpr char16_t literal16[] = u"ab";
         check(literal8, literal16);
     }
     {
         static constexpr char literal8[] = "abc";
         static constexpr char16_t literal16[] = u"abc";
         check(literal8, literal16);
     }
     {
         static constexpr char literal8[] = "abcd";
         static constexpr char16_t literal16[] = u"abcd";
         check(literal8, literal16);
     }
     {
         static constexpr char literal8[] = "abcde";
         static constexpr char16_t literal16[] = u"abcde";
         check(literal8, literal16);
     }
     {
         static constexpr char literal8[] = "abcdefg";
         static constexpr char16_t literal16[] = u"abcdefg";
         check(literal8, literal16);
     }
     {
         static constexpr char literal8[] = "abcdefgh";
         static constexpr char16_t literal16[] = u"abcdefgh";
         check(literal8, literal16);
     }
     {
         static constexpr char literal8[] = "0123456789abcdef";
         static constexpr char16_t literal16[] = u"0123456789abcdef";
         check(literal8, literal16);
     }
     {
         static constexpr char literal8[] = "0123456789abcdefg";
         static constexpr char16_t literal16[] = u"0123456789abcdefg";
         check(literal8, literal16);
     }
     {
         static constexpr char literal8[] = "0123456789abcdef0123456789abcdef0123456789abcdef";
         static constexpr char16_t literal16[] = u"0123456789abcdef0123456789abcdef0123456789abcdef";
         check(literal8, literal16);
     }
     {
         static constexpr char literal8[] = "0123456789abcdef0123456789abcdef0123456789abcdef!";
         static constexpr char16_t literal16[] = u"0123456789abcdef0123456789abcdef0123456789abcdef!";
         check(literal8, literal16);
     }
     {
         // 64 chars
         static constexpr char literal8[] = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
         static constexpr char16_t literal16[] = u"0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
         check(literal8, literal16);
     }
     {
         // 100 chars
         static constexpr char literal8[] = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdefABCD";
         static constexpr char16_t literal16[] = u"0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdefABCD";
         check(literal8, literal16);
     }
 }

 // Non-Latin1 char16_t hashed at constexpr must match runtime (no 8-bit
 // equivalent; we only assert the constexpr-vs-runtime invariant).
 TEST(WTF, RapidHasher_NonLatin1ConstexprMatchesRuntime)
 {
     static constexpr char16_t mixed[] = u"a\u00A3\u1234b\u4E2D\u6587\u0041";
     constexpr size_t length = std::extent_v<decltype(mixed)> - 1;
     unsigned constexprHash = RapidHash::computeHashAndMaskTop8Bits<char16_t>(std::span { mixed, length });
     unsigned runtimeHash = RapidHash::computeHashAndMaskTop8Bits(std::span<const char16_t> { mixed, length });
     EXPECT_EQ(constexprHash, runtimeHash);
 }

 // ASCIICaseInsensitiveHash is the primary user of the custom-Converter code
 // path (its FoldCase::convert(c) returns toASCIILower(c)). Verify that:
 //   - the same content hashes identically regardless of case ("hello" == "HELLO").
 //   - 8-bit and 16-bit storage with the same ASCII content hash identically.
 TEST(WTF, RapidHasher_ASCIICaseInsensitive_8Bit_16Bit_Match)
 {
     std::array<Latin1Character, 5> lower8 { 'h', 'e', 'l', 'l', 'o' };
     std::array<Latin1Character, 5> upper8 { 'H', 'E', 'L', 'L', 'O' };
     std::array<char16_t, 5> lower16 { u'h', u'e', u'l', u'l', u'o' };
     std::array<char16_t, 5> upper16 { u'H', u'E', u'L', u'L', u'O' };

     unsigned h1 = ASCIICaseInsensitiveHash::hash(std::span<const Latin1Character> { lower8 });
     unsigned h2 = ASCIICaseInsensitiveHash::hash(std::span<const Latin1Character> { upper8 });
     unsigned h3 = ASCIICaseInsensitiveHash::hash(std::span<const char16_t> { lower16 });
     unsigned h4 = ASCIICaseInsensitiveHash::hash(std::span<const char16_t> { upper16 });

     EXPECT_EQ(h1, h2);
     EXPECT_EQ(h1, h3);
     EXPECT_EQ(h1, h4);
 }

 // Exercise the custom-Converter scalar OR-fold path at sizes that hit each
 // rapidhashImpl branch (< 4, < 16, < 48, > 48).
 TEST(WTF, RapidHasher_ASCIICaseInsensitive_SizeCoverage)
 {
     for (size_t size : { 1u, 3u, 7u, 15u, 16u, 17u, 48u, 49u, 96u, 128u }) {
         auto lower8 = std::unique_ptr<Latin1Character[]>(new Latin1Character[size]);
         auto upper8 = std::unique_ptr<Latin1Character[]>(new Latin1Character[size]);
         auto lower16 = std::unique_ptr<char16_t[]>(new char16_t[size]);
         auto upper16 = std::unique_ptr<char16_t[]>(new char16_t[size]);
         for (size_t i = 0; i < size; ++i) {
             char lo = 'a' + static_cast<char>(i % 26);
             char up = 'A' + static_cast<char>(i % 26);
             lower8[i] = static_cast<Latin1Character>(lo);
             upper8[i] = static_cast<Latin1Character>(up);
             lower16[i] = static_cast<char16_t>(lo);
             upper16[i] = static_cast<char16_t>(up);
         }
         unsigned h1 = ASCIICaseInsensitiveHash::hash(std::span<const Latin1Character> { lower8.get(), size });
         unsigned h2 = ASCIICaseInsensitiveHash::hash(std::span<const Latin1Character> { upper8.get(), size });
         unsigned h3 = ASCIICaseInsensitiveHash::hash(std::span<const char16_t> { lower16.get(), size });
         unsigned h4 = ASCIICaseInsensitiveHash::hash(std::span<const char16_t> { upper16.get(), size });
         EXPECT_EQ(h1, h2) << "size=" << size;
         EXPECT_EQ(h1, h3) << "size=" << size;
         EXPECT_EQ(h1, h4) << "size=" << size;
     }
 }

 // Core invariant: a Latin1 string and the same content stored as char16_t must
 // produce identical hashes, whether hashed via the low-level hasher or via the
 // public StringImpl / String APIs. Exercises a range of realistic inputs and
 // sizes that cover every branch of rapidhashImpl (<4, 4-16, 17-48, 49-96, >96)
 // and both the short and SIMD paths of the Latin1 scan.
 TEST(WTF, RapidHasher_SameContentDifferentWidthSameHash)
 {
     struct Case {
         const char* ascii;
     } cases[] = {
         { "" },
         { "a" },
         { "ab" },
         { "abc" },
         { "abcd" },
         { "Hello" },
         { "html" },
         { "charset" },
         { "Content-Type" },
         { "0123456789abcdef" }, // exactly 16
         { "0123456789abcdefg" }, // 17 — crosses <=16 branch
         { "The quick brown fox jumps over the lazy dog" }, // 43
         { "0123456789abcdef0123456789abcdef0123456789abcdef" }, // 48
         { "0123456789abcdef0123456789abcdef0123456789abcdef!" }, // 49
         // > 96 chars — exercises the unrolled triple-accumulator loop
         { "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef!" },
     };

     for (auto& c : cases) {
         size_t length = std::strlen(c.ascii);

         // 1. Raw hasher: Latin1 buffer vs char16_t buffer with same content.
         Vector<Latin1Character> buf8(length);
         Vector<char16_t> buf16(length);
         for (size_t i = 0; i < length; ++i) {
             buf8[i] = static_cast<Latin1Character>(c.ascii[i]);
             buf16[i] = static_cast<char16_t>(c.ascii[i]);
         }
         unsigned raw8 = RapidHash::computeHashAndMaskTop8Bits(buf8.span());
         unsigned raw16 = RapidHash::computeHashAndMaskTop8Bits<char16_t>(buf16.span());
         EXPECT_EQ(raw8, raw16) << "ascii=\"" << c.ascii << "\" length=" << length;

         // 2. StringImpl-level: build a single-byte StringImpl and a 16-bit
         //    StringImpl with the same content and compare their hashes.
         //    StringImpl::hash() caches, so build fresh impls each iteration.
         RefPtr<StringImpl> s8 = StringImpl::create(buf8.span());
         RefPtr<StringImpl> s16 = StringImpl::create8BitIfPossible(buf16.span());
         // Verify we actually got different widths (unless the string is empty).
         if (length) {
             // s8 is always 8-bit. s16 created via create8BitIfPossible from 16-bit
             // data that happens to be all-Latin1 will also be downgraded to 8-bit,
             // which is fine for hash equivalence — both impls then share the 8-bit
             // path. To explicitly exercise the 16-bit storage path, build s16 via
             // StringImpl::create(std::span<const char16_t>) which keeps it 16-bit.
             RefPtr<StringImpl> s16Wide = StringImpl::create(buf16.span());
             EXPECT_FALSE(s16Wide->is8Bit());
             EXPECT_EQ(s8->hash(), s16Wide->hash())
                 << "ascii=\"" << c.ascii << "\" (StringImpl, 8-bit vs forced-16-bit)";
         }
         EXPECT_EQ(s8->hash(), s16->hash())
             << "ascii=\"" << c.ascii << "\" (StringImpl, create8BitIfPossible)";
     }
 }

 // Verify that WebCore's build-time hashers (Source/JavaScriptCore/yarr/hasher.py
 // and Source/WebCore/bindings/scripts/Hasher.pm) produce the same value as the
 // C++ runtime for the same ASCII input. The reference values below were
 // generated by running Source/JavaScriptCore/yarr/hasher.py::stringHash on
 // strings of varying length that cover every branch of rapidhashImpl
 // (len <= 3, <= 16, <= 48, <= 96, > 96). If this test fails, one of the four
 // implementations (C++, Python, Perl Hasher.pm, Perl create_hash_table) has
 // drifted from the others — regenerate all hashers together and rerun.
 TEST(WTF, RapidHasher_BuildTimeHashersMatchRuntime)
 {
     auto genString = [](size_t length) {
         std::string s;
         s.reserve(length);
         for (size_t i = 0; i < length; ++i)
             s.push_back(static_cast<char>((i * 7 + 33) % 127));
         return s;
     };

     struct Case {
         size_t length;
         unsigned expected;
     };
     Case cases[] = {
         { 0, 15648162 },
         { 1, 1359507 },
         { 2, 16250124 },
         { 3, 7253243 },
         { 4, 293296 },
         { 5, 15527655 },
         { 7, 3511038 },
         { 8, 12660779 },
         { 15, 11872380 },
         { 16, 12289576 },
         { 17, 10542644 },
         { 23, 2152857 },
         { 24, 12685428 },
         { 32, 7214394 },
         { 47, 10434199 },
         { 48, 2948560 },
         { 49, 605845 },
         { 63, 16717952 },
         { 64, 6459137 },
         { 96, 1178325 },
         { 97, 10876528 },
         { 128, 16479986 },
         { 256, 3145085 },
     };

     for (auto& c : cases) {
         auto s = genString(c.length);
         unsigned h = RapidHash::computeHashAndMaskTop8Bits(std::span<const Latin1Character>(
             reinterpret_cast<const Latin1Character*>(s.data()), s.size()));
         EXPECT_EQ(h, c.expected) << "length=" << c.length;
     }
 }

 } // namespace TestWebKitAPI
	/*
	* Copyright (C) 2013-2023 Apple Inc. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
	* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
	* THE POSSIBILITY OF SUCH DAMAGE.
	*/

	#include "config.h"
	#include <wtf/text/RapidHash.h>

	#include <cstdio>
	#include <wtf/text/StringHash.h>
	#include <wtf/text/StringImpl.h>
	#include <wtf/text/WTFString.h>

	namespace TestWebKitAPI {

	static const unsigned expected[256] = {
	15648162, 7379797, 2070420, 1145527, 8646054, 15241182, 9023636, 3432937, 5062235, 9411973, 8896458, 400898, 11288028, 15246497,
	16435911, 10710282, 11347848, 4926495, 5707730, 12461996, 15979074, 8192552, 4131547, 4233318, 12626894, 6755298, 7868599, 12823534,
	15827911, 14924241, 4759336, 1088812, 7488120, 15311191, 12013381, 16679728, 13118800, 13009161, 6484391, 4172168, 5167151, 817127,
	12403441, 2342506, 10144396, 11025206, 7062744, 15680250, 13612965, 869880, 4014223, 14373616, 4510356, 8595635, 5794747, 6413633,
	15160000, 8866527, 15428778, 12958164, 11046123, 13233833, 16651263, 8725559, 9176081, 14061736, 11344565, 5810756, 12458473, 3701732,
	6169408, 8744620, 13545725, 7498676, 10964582, 12211699, 6547798, 12410754, 15763191, 6742586, 12102147, 15835431, 7399542, 3934377,
	14165379, 16203007, 1429223, 8153632, 13217555, 9421910, 7722034, 804888, 13084468, 8451886, 8948988, 10345068, 15432129, 1274364,
	12502940, 4657399, 7753220, 2705153, 6018100, 9597385, 16276144, 7700322, 692044, 12527633, 3400946, 4513351, 11233477, 13209535,
	12920005, 12470278, 12416119, 5044722, 9607204, 6859191, 5691381, 15134985, 862804, 11678044, 9501460, 13797854, 9493874, 417362,
	11681400, 12943228, 6221477, 4274081, 9331847, 7330476, 9884564, 15862969, 6490954, 7551756, 13785547, 7802142, 12872755, 4615712,
	8904795, 1545520, 12927801, 5591092, 9366396, 11918163, 83670, 6325621, 8142363, 16462751, 3496810, 8595953, 1480256, 6721450,
	11516718, 11853227, 14404149, 12296203, 9395718, 7875681, 7096160, 3481091, 15127520, 8429186, 10728210, 10806655, 12191713, 14381579,
	14574049, 12323324, 2713101, 12030023, 6494449, 7096722, 10996011, 2863720, 789298, 8569965, 13120101, 13360858, 7175527, 76657,
	2755224, 2643920, 14137504, 1144911, 1672686, 10746076, 1935564, 4652328, 6334663, 15111256, 7944511, 4270370, 4440447, 4140331,
	14040329, 15929079, 13138253, 5570775, 13820680, 549163, 12115401, 14000058, 1581010, 13166429, 3102479, 12136319, 2626194, 10071173,
	14886108, 10021120, 964629, 11804913, 829378, 1004309, 6551740, 8857819, 15280313, 5586978, 1904222, 11159718, 3805985, 404066,
	3248840, 4377825, 9995210, 12087443, 2247276, 9838854, 15703402, 16102836, 11787949, 216832, 10266988, 4553076, 12253763, 924739,
	14137553, 13714506, 5107931, 4865914, 15687457, 13011562, 13781392, 15981974, 8615599, 793617, 12964706, 10511751, 7135570, 4958064,
	11359717, 16212004, 13049943, 13585204
	};

	TEST(WTF, RapidHasher)
	{
	auto generateLatin1Array = [&](size_t size) {
	auto array = std::unique_ptr<Latin1Character[]>(new Latin1Character[size]);
	for (size_t i = 0; i < size; i++)
	array[i] = i;
	return array;
	};

	auto generateUTF16Array = [&](size_t size) {
	auto array = std::unique_ptr<char16_t[]>(new char16_t[size]);
	for (size_t i = 0; i < size; i++)
	array[i] = i;
	return array;
	};

	unsigned max8Bit = std::numeric_limits<uint8_t>::max();
	for (size_t size = 0; size <= max8Bit; size++) {
	std::unique_ptr<const Latin1Character[]> arr1 = generateLatin1Array(size);
	std::unique_ptr<const char16_t[]> arr2 = generateUTF16Array(size);
	unsigned left = RapidHash::computeHashAndMaskTop8Bits(std::span { arr1.get(), size });
	unsigned right = RapidHash::computeHashAndMaskTop8Bits(std::span { arr2.get(), size });
	ASSERT_EQ(left, right);
	ASSERT_EQ(left, expected[size]);
	}
	}

	// Exercise the long-string branches in rapidhashCore (>16, >48, >96 bytes)
	// across the full range of sizes used by real-world strings, and confirm the
	// 8-bit vs 16-bit Latin1 invariant still holds.
	TEST(WTF, RapidHasher_LongStringInvariant)
	{
	constexpr size_t maxSize = 512;
	auto arr1Mut = std::unique_ptr<Latin1Character[]>(new Latin1Character[maxSize]);
	auto arr2Mut = std::unique_ptr<char16_t[]>(new char16_t[maxSize]);
	for (size_t i = 0; i < maxSize; ++i) {
	arr1Mut[i] = static_cast<Latin1Character>(i & 0xFF);
	arr2Mut[i] = static_cast<char16_t>(i & 0xFF);
	}
	const Latin1Character* arr1 = arr1Mut.get();
	const char16_t* arr2 = arr2Mut.get();
	for (size_t size = 0; size <= maxSize; ++size) {
	unsigned left = RapidHash::computeHashAndMaskTop8Bits(std::span { arr1, size });
	unsigned right = RapidHash::computeHashAndMaskTop8Bits(std::span { arr2, size });
	ASSERT_EQ(left, right) << "size=" << size;
	}
	}

	// A hash computed at compile time over a char16_t literal must equal both:
	// (a) the hash computed at runtime over the same char16_t buffer, and
	// (b) the hash of the equivalent Latin1 byte sequence (constexpr or runtime).
	// Pre-fix, the constexpr 16-bit path used rapidhashRawBytes16 (len=2N) while
	// runtime used rapidhashCompressed16 (len=N), so this would diverge.
	TEST(WTF, RapidHasher_ConstexprMatchesRuntime)
	{
	auto check = [](auto& literal8, auto& literal16) {
	constexpr size_t length = std::extent_v<std::remove_reference_t<decltype(literal8)>> - 1;
	static_assert(std::extent_v<std::remove_reference_t<decltype(literal16)>> - 1 == length);

	unsigned constexprHash16 = RapidHash::computeHashAndMaskTop8Bits<char16_t>(std::span { literal16, length });
	unsigned runtimeHash16 = RapidHash::computeHashAndMaskTop8Bits(std::span<const char16_t> { literal16, length });
	unsigned runtimeHash8 = RapidHash::computeHashAndMaskTop8Bits(std::span { reinterpret_cast<const Latin1Character*>(literal8), length });
	EXPECT_EQ(constexprHash16, runtimeHash16) << "length=" << length;
	EXPECT_EQ(constexprHash16, runtimeHash8) << "length=" << length;
	};

	{
	static constexpr char literal8[] = "x";
	static constexpr char16_t literal16[] = u"x";
	check(literal8, literal16);
	}
	{
	static constexpr char literal8[] = "ab";
	static constexpr char16_t literal16[] = u"ab";
	check(literal8, literal16);
	}
	{
	static constexpr char literal8[] = "abc";
	static constexpr char16_t literal16[] = u"abc";
	check(literal8, literal16);
	}
	{
	static constexpr char literal8[] = "abcd";
	static constexpr char16_t literal16[] = u"abcd";
	check(literal8, literal16);
	}
	{
	static constexpr char literal8[] = "abcde";
	static constexpr char16_t literal16[] = u"abcde";
	check(literal8, literal16);
	}
	{
	static constexpr char literal8[] = "abcdefg";
	static constexpr char16_t literal16[] = u"abcdefg";
	check(literal8, literal16);
	}
	{
	static constexpr char literal8[] = "abcdefgh";
	static constexpr char16_t literal16[] = u"abcdefgh";
	check(literal8, literal16);
	}
	{
	static constexpr char literal8[] = "0123456789abcdef";
	static constexpr char16_t literal16[] = u"0123456789abcdef";
	check(literal8, literal16);
	}
	{
	static constexpr char literal8[] = "0123456789abcdefg";
	static constexpr char16_t literal16[] = u"0123456789abcdefg";
	check(literal8, literal16);
	}
	{
	static constexpr char literal8[] = "0123456789abcdef0123456789abcdef0123456789abcdef";
	static constexpr char16_t literal16[] = u"0123456789abcdef0123456789abcdef0123456789abcdef";
	check(literal8, literal16);
	}
	{
	static constexpr char literal8[] = "0123456789abcdef0123456789abcdef0123456789abcdef!";
	static constexpr char16_t literal16[] = u"0123456789abcdef0123456789abcdef0123456789abcdef!";
	check(literal8, literal16);
	}
	{
	// 64 chars
	static constexpr char literal8[] = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
	static constexpr char16_t literal16[] = u"0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
	check(literal8, literal16);
	}
	{
	// 100 chars
	static constexpr char literal8[] = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdefABCD";
	static constexpr char16_t literal16[] = u"0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdefABCD";
	check(literal8, literal16);
	}
	}

	// Non-Latin1 char16_t hashed at constexpr must match runtime (no 8-bit
	// equivalent; we only assert the constexpr-vs-runtime invariant).
	TEST(WTF, RapidHasher_NonLatin1ConstexprMatchesRuntime)
	{
	static constexpr char16_t mixed[] = u"a\u00A3\u1234b\u4E2D\u6587\u0041";
	constexpr size_t length = std::extent_v<decltype(mixed)> - 1;
	unsigned constexprHash = RapidHash::computeHashAndMaskTop8Bits<char16_t>(std::span { mixed, length });
	unsigned runtimeHash = RapidHash::computeHashAndMaskTop8Bits(std::span<const char16_t> { mixed, length });
	EXPECT_EQ(constexprHash, runtimeHash);
	}

	// ASCIICaseInsensitiveHash is the primary user of the custom-Converter code
	// path (its FoldCase::convert(c) returns toASCIILower(c)). Verify that:
	// - the same content hashes identically regardless of case ("hello" == "HELLO").
	// - 8-bit and 16-bit storage with the same ASCII content hash identically.
	TEST(WTF, RapidHasher_ASCIICaseInsensitive_8Bit_16Bit_Match)
	{
	std::array<Latin1Character, 5> lower8 { 'h', 'e', 'l', 'l', 'o' };
	std::array<Latin1Character, 5> upper8 { 'H', 'E', 'L', 'L', 'O' };
	std::array<char16_t, 5> lower16 { u'h', u'e', u'l', u'l', u'o' };
	std::array<char16_t, 5> upper16 { u'H', u'E', u'L', u'L', u'O' };

	unsigned h1 = ASCIICaseInsensitiveHash::hash(std::span<const Latin1Character> { lower8 });
	unsigned h2 = ASCIICaseInsensitiveHash::hash(std::span<const Latin1Character> { upper8 });
	unsigned h3 = ASCIICaseInsensitiveHash::hash(std::span<const char16_t> { lower16 });
	unsigned h4 = ASCIICaseInsensitiveHash::hash(std::span<const char16_t> { upper16 });

	EXPECT_EQ(h1, h2);
	EXPECT_EQ(h1, h3);
	EXPECT_EQ(h1, h4);
	}

	// Exercise the custom-Converter scalar OR-fold path at sizes that hit each
	// rapidhashImpl branch (< 4, < 16, < 48, > 48).
	TEST(WTF, RapidHasher_ASCIICaseInsensitive_SizeCoverage)
	{
	for (size_t size : { 1u, 3u, 7u, 15u, 16u, 17u, 48u, 49u, 96u, 128u }) {
	auto lower8 = std::unique_ptr<Latin1Character[]>(new Latin1Character[size]);
	auto upper8 = std::unique_ptr<Latin1Character[]>(new Latin1Character[size]);
	auto lower16 = std::unique_ptr<char16_t[]>(new char16_t[size]);
	auto upper16 = std::unique_ptr<char16_t[]>(new char16_t[size]);
	for (size_t i = 0; i < size; ++i) {
	char lo = 'a' + static_cast<char>(i % 26);
	char up = 'A' + static_cast<char>(i % 26);
	lower8[i] = static_cast<Latin1Character>(lo);
	upper8[i] = static_cast<Latin1Character>(up);
	lower16[i] = static_cast<char16_t>(lo);
	upper16[i] = static_cast<char16_t>(up);
	}
	unsigned h1 = ASCIICaseInsensitiveHash::hash(std::span<const Latin1Character> { lower8.get(), size });
	unsigned h2 = ASCIICaseInsensitiveHash::hash(std::span<const Latin1Character> { upper8.get(), size });
	unsigned h3 = ASCIICaseInsensitiveHash::hash(std::span<const char16_t> { lower16.get(), size });
	unsigned h4 = ASCIICaseInsensitiveHash::hash(std::span<const char16_t> { upper16.get(), size });
	EXPECT_EQ(h1, h2) << "size=" << size;
	EXPECT_EQ(h1, h3) << "size=" << size;
	EXPECT_EQ(h1, h4) << "size=" << size;
	}
	}

	// Core invariant: a Latin1 string and the same content stored as char16_t must
	// produce identical hashes, whether hashed via the low-level hasher or via the
	// public StringImpl / String APIs. Exercises a range of realistic inputs and
	// sizes that cover every branch of rapidhashImpl (<4, 4-16, 17-48, 49-96, >96)
	// and both the short and SIMD paths of the Latin1 scan.
	TEST(WTF, RapidHasher_SameContentDifferentWidthSameHash)
	{
	struct Case {
	const char* ascii;
	} cases[] = {
	{ "" },
	{ "a" },
	{ "ab" },
	{ "abc" },
	{ "abcd" },
	{ "Hello" },
	{ "html" },
	{ "charset" },
	{ "Content-Type" },
	{ "0123456789abcdef" }, // exactly 16
	{ "0123456789abcdefg" }, // 17 — crosses <=16 branch
	{ "The quick brown fox jumps over the lazy dog" }, // 43
	{ "0123456789abcdef0123456789abcdef0123456789abcdef" }, // 48
	{ "0123456789abcdef0123456789abcdef0123456789abcdef!" }, // 49
	// > 96 chars — exercises the unrolled triple-accumulator loop
	{ "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef!" },
	};

	for (auto& c : cases) {
	size_t length = std::strlen(c.ascii);

	// 1. Raw hasher: Latin1 buffer vs char16_t buffer with same content.
	Vector<Latin1Character> buf8(length);
	Vector<char16_t> buf16(length);
	for (size_t i = 0; i < length; ++i) {
	buf8[i] = static_cast<Latin1Character>(c.ascii[i]);
	buf16[i] = static_cast<char16_t>(c.ascii[i]);
	}
	unsigned raw8 = RapidHash::computeHashAndMaskTop8Bits(buf8.span());
	unsigned raw16 = RapidHash::computeHashAndMaskTop8Bits<char16_t>(buf16.span());
	EXPECT_EQ(raw8, raw16) << "ascii=\"" << c.ascii << "\" length=" << length;

	// 2. StringImpl-level: build a single-byte StringImpl and a 16-bit
	// StringImpl with the same content and compare their hashes.
	// StringImpl::hash() caches, so build fresh impls each iteration.
	RefPtr<StringImpl> s8 = StringImpl::create(buf8.span());
	RefPtr<StringImpl> s16 = StringImpl::create8BitIfPossible(buf16.span());
	// Verify we actually got different widths (unless the string is empty).
	if (length) {
	// s8 is always 8-bit. s16 created via create8BitIfPossible from 16-bit
	// data that happens to be all-Latin1 will also be downgraded to 8-bit,
	// which is fine for hash equivalence — both impls then share the 8-bit
	// path. To explicitly exercise the 16-bit storage path, build s16 via
	// StringImpl::create(std::span<const char16_t>) which keeps it 16-bit.
	RefPtr<StringImpl> s16Wide = StringImpl::create(buf16.span());
	EXPECT_FALSE(s16Wide->is8Bit());
	EXPECT_EQ(s8->hash(), s16Wide->hash())
	<< "ascii=\"" << c.ascii << "\" (StringImpl, 8-bit vs forced-16-bit)";
	}
	EXPECT_EQ(s8->hash(), s16->hash())
	<< "ascii=\"" << c.ascii << "\" (StringImpl, create8BitIfPossible)";
	}
	}

	// Verify that WebCore's build-time hashers (Source/JavaScriptCore/yarr/hasher.py
	// and Source/WebCore/bindings/scripts/Hasher.pm) produce the same value as the
	// C++ runtime for the same ASCII input. The reference values below were
	// generated by running Source/JavaScriptCore/yarr/hasher.py::stringHash on
	// strings of varying length that cover every branch of rapidhashImpl
	// (len <= 3, <= 16, <= 48, <= 96, > 96). If this test fails, one of the four
	// implementations (C++, Python, Perl Hasher.pm, Perl create_hash_table) has
	// drifted from the others — regenerate all hashers together and rerun.
	TEST(WTF, RapidHasher_BuildTimeHashersMatchRuntime)
	{
	auto genString = [](size_t length) {
	std::string s;
	s.reserve(length);
	for (size_t i = 0; i < length; ++i)
	s.push_back(static_cast<char>((i * 7 + 33) % 127));
	return s;
	};

	struct Case {
	size_t length;
	unsigned expected;
	};
	Case cases[] = {
	{ 0, 15648162 },
	{ 1, 1359507 },
	{ 2, 16250124 },
	{ 3, 7253243 },
	{ 4, 293296 },
	{ 5, 15527655 },
	{ 7, 3511038 },
	{ 8, 12660779 },
	{ 15, 11872380 },
	{ 16, 12289576 },
	{ 17, 10542644 },
	{ 23, 2152857 },
	{ 24, 12685428 },
	{ 32, 7214394 },
	{ 47, 10434199 },
	{ 48, 2948560 },
	{ 49, 605845 },
	{ 63, 16717952 },
	{ 64, 6459137 },
	{ 96, 1178325 },
	{ 97, 10876528 },
	{ 128, 16479986 },
	{ 256, 3145085 },
	};

	for (auto& c : cases) {
	auto s = genString(c.length);
	unsigned h = RapidHash::computeHashAndMaskTop8Bits(std::span<const Latin1Character>(
	reinterpret_cast<const Latin1Character*>(s.data()), s.size()));
	EXPECT_EQ(h, c.expected) << "length=" << c.length;
	}
	}

	} // namespace TestWebKitAPI