diff --git a/.github/actions/spelling/expect/expect.txt b/.github/actions/spelling/expect/expect.txt index 065cfd84a6..46e864af93 100644 --- a/.github/actions/spelling/expect/expect.txt +++ b/.github/actions/spelling/expect/expect.txt @@ -1,4 +1,5 @@ ABANDONFONT +ABCDEFGHIJKLMNOPQRSTUVWXY abgr abi ACCESSTOKEN @@ -42,7 +43,6 @@ antialias antialiasing ANull anycpu -AOn APARTMENTTHREADED APCs api @@ -80,7 +80,6 @@ ASingle asm asmv asmx -AStomps ASYNCWINDOWPOS atch ATest @@ -232,7 +231,6 @@ chcp checkbox checkboxes chh -Childitem chk chrono CHT @@ -669,6 +667,7 @@ ECH echokey ecount ECpp +ect Edgium EDITKEYS EDITTEXT @@ -702,6 +701,7 @@ ENUMLOGFONTEX enumranges envvar eol +eplace EPres EQU ERASEBKGND @@ -779,7 +779,6 @@ FIXEDCONVERTED FIXEDFILEINFO Flg flyout -fmix fmodern fmtarg fmtid @@ -996,6 +995,7 @@ HPR HProvider HREDRAW hresult +hrottled HRSRC hscroll hsl @@ -1030,6 +1030,7 @@ ICache icacls iccex IChar +icket ico IComponent ICONERROR @@ -2431,6 +2432,8 @@ uint uintptr ulcch ulong +umul +umulh Unadvise unattend uncomment @@ -2735,6 +2738,9 @@ WUX WVerify WWith wxh +wyhash +wymix +wyr xact xaml Xamlmeta @@ -2795,6 +2801,7 @@ YSize YSubstantial YVIRTUALSCREEN YWalk +Zabcdefghijklmnopqrstuvwxyz ZCmd ZCtrl zsh diff --git a/NOTICE.md b/NOTICE.md index 5c153f6ab6..e4a1a694a2 100644 --- a/NOTICE.md +++ b/NOTICE.md @@ -276,6 +276,39 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ``` +## wyhash + +**Source**: [https://github.com/wangyi-fudan/wyhash](https://github.com/wangyi-fudan/wyhash) + +### License + +``` +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to +``` + ## ConEmu **Source**: [https://github.com/Maximus5/ConEmu](https://github.com/Maximus5/ConEmu) diff --git a/oss/wyhash/LICENSE b/oss/wyhash/LICENSE new file mode 100644 index 0000000000..f223c03afe --- /dev/null +++ b/oss/wyhash/LICENSE @@ -0,0 +1,25 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to + diff --git a/oss/wyhash/MAINTAINER_README.md b/oss/wyhash/MAINTAINER_README.md new file mode 100644 index 0000000000..a4f827393b --- /dev/null +++ b/oss/wyhash/MAINTAINER_README.md @@ -0,0 +1,4 @@ +### Notes for Future Maintainers + +[wyhash](https://github.com/wangyi-fudan/wyhash) is used as the hash algorithm for `` and its `til::hasher`. +The source code was directly integrated into that header file and can be found in `/src/inc/til/hash.h`. diff --git a/oss/wyhash/cgmanifest.json b/oss/wyhash/cgmanifest.json new file mode 100644 index 0000000000..872322ff16 --- /dev/null +++ b/oss/wyhash/cgmanifest.json @@ -0,0 +1,14 @@ +{ + "Registrations": [ + { + "component": { + "type": "git", + "git": { + "repositoryUrl": "https://github.com/wangyi-fudan/wyhash", + "commitHash": "e77036ac1943369dc03e611cde52a8570f8ceefe" + } + } + } + ], + "Version": 1 +} \ No newline at end of file diff --git a/src/buffer/out/UnicodeStorage.hpp b/src/buffer/out/UnicodeStorage.hpp index e733013197..609338dca5 100644 --- a/src/buffer/out/UnicodeStorage.hpp +++ b/src/buffer/out/UnicodeStorage.hpp @@ -17,7 +17,6 @@ Author(s): #include #include -#include #include // std::unordered_map needs help to know how to hash a til::point @@ -33,9 +32,9 @@ namespace std // - coord - the coord to hash // Return Value: // - the hashed coord - constexpr size_t operator()(const til::point coord) const noexcept + size_t operator()(const til::point coord) const noexcept { - return til::hash(til::bit_cast(coord)); + return til::hash(coord); } }; } diff --git a/src/inc/til/hash.h b/src/inc/til/hash.h index 7b7c9c4b53..49e9e57795 100644 --- a/src/inc/til/hash.h +++ b/src/inc/til/hash.h @@ -3,7 +3,25 @@ #pragma once -#include "bit.h" +#pragma warning(push) +// std::hash() doesn't test for `nullptr`, nor do we want to. +#pragma warning(disable : 26429) // Symbol '...' is never tested for nullness, it can be marked as not_null (f.23). +// Misdiagnosis: static_cast is used to differentiate between 2 overloads of til::hasher::write. +#pragma warning(disable : 26474) // Don't cast between pointer types when the conversion could be implicit (type.1). +// We don't want to unnecessarily modify wyhash from its original. +#pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). +#pragma warning(disable : 26494) // Variable '...' is uninitialized. Always initialize an object (type.5). +#pragma warning(disable : 26496) // The variable '...' does not change after construction, mark it as const (con.4). + +#if defined(_M_X64) && !defined(_M_ARM64EC) +#define TIL_HASH_X64 +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +#define TIL_HASH_ARM64 +#elif defined(_M_IX86) +#define TIL_HASH_X86 +#else +#error "Unsupported architecture for til::hash" +#endif namespace til { @@ -12,31 +30,27 @@ namespace til struct hasher { - explicit constexpr hasher(size_t state = FNV_offset_basis) noexcept : + constexpr hasher() = default; + explicit constexpr hasher(size_t state) noexcept : _hash{ state } {} template - constexpr void write(const T& v) noexcept + hasher& write(const T& v) noexcept { hash_trait{}(*this, v); + return *this; } template>> - constexpr void write(const T* data, size_t count) noexcept + hasher& write(const T* data, size_t count) noexcept { -#pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1). - write(reinterpret_cast(data), sizeof(T) * count); + return write(static_cast(data), sizeof(T) * count); } -#pragma warning(suppress : 26429) // Symbol 'data' is never tested for nullness, it can be marked as not_null (f.23). - constexpr void write(const uint8_t* data, size_t count) noexcept + hasher& write(const void* data, size_t len) noexcept { - for (size_t i = 0; i < count; ++i) - { -#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). - _hash ^= static_cast(data[i]); - _hash *= FNV_prime; - } + _hash = _wyhash(data, len, _hash); + return *this; } constexpr size_t finalize() const noexcept @@ -45,15 +59,151 @@ namespace til } private: -#if defined(_WIN64) - static constexpr size_t FNV_offset_basis = 14695981039346656037ULL; - static constexpr size_t FNV_prime = 1099511628211ULL; -#else - static constexpr size_t FNV_offset_basis = 2166136261U; - static constexpr size_t FNV_prime = 16777619U; -#endif +#if defined(TIL_HASH_X86) - size_t _hash = FNV_offset_basis; + static uint32_t _wyr24(const uint8_t* p, uint32_t k) noexcept + { + return static_cast(p[0]) << 16 | static_cast(p[k >> 1]) << 8 | p[k - 1]; + } + + static uint32_t _wyr32(const uint8_t* p) noexcept + { + uint32_t v; + memcpy(&v, p, 4); + return v; + } + + static void _wymix32(uint32_t* a, uint32_t* b) noexcept + { + uint64_t c = *a ^ UINT32_C(0x53c5ca59); + c *= *b ^ UINT32_C(0x74743c1b); + *a = static_cast(c); + *b = static_cast(c >> 32); + } + + static uint32_t _wyhash(const void* data, uint32_t len, uint32_t seed) noexcept + { + auto p = static_cast(data); + auto i = len; + auto see1 = len; + _wymix32(&seed, &see1); + + for (; i > 8; i -= 8, p += 8) + { + seed ^= _wyr32(p); + see1 ^= _wyr32(p + 4); + _wymix32(&seed, &see1); + } + if (i >= 4) + { + seed ^= _wyr32(p); + see1 ^= _wyr32(p + i - 4); + } + else if (i) + { + seed ^= _wyr24(p, i); + } + + _wymix32(&seed, &see1); + _wymix32(&seed, &see1); + return seed ^ see1; + } + +#else // defined(TIL_HASH_X86) + + static uint64_t _wyr3(const uint8_t* p, size_t k) noexcept + { + return (static_cast(p[0]) << 16) | (static_cast(p[k >> 1]) << 8) | p[k - 1]; + } + + static uint64_t _wyr4(const uint8_t* p) noexcept + { + uint32_t v; + memcpy(&v, p, 4); + return v; + } + + static uint64_t _wyr8(const uint8_t* p) noexcept + { + uint64_t v; + memcpy(&v, p, 8); + return v; + } + + static uint64_t _wymix(uint64_t lhs, uint64_t rhs) noexcept + { +#if defined(TIL_HASH_X64) + uint64_t hi; + uint64_t lo = _umul128(lhs, rhs, &hi); +#elif defined(TIL_HASH_ARM64) + const uint64_t lo = lhs * rhs; + const uint64_t hi = __umulh(lhs, rhs); +#endif + return lo ^ hi; + } + + static uint64_t _wyhash(const void* data, uint64_t len, uint64_t seed) noexcept + { + static constexpr auto s0 = UINT64_C(0xa0761d6478bd642f); + static constexpr auto s1 = UINT64_C(0xe7037ed1a0b428db); + static constexpr auto s2 = UINT64_C(0x8ebc6af09c88c6e3); + static constexpr auto s3 = UINT64_C(0x589965cc75374cc3); + + auto p = static_cast(data); + seed ^= s0; + uint64_t a; + uint64_t b; + + if (len <= 16) + { + if (len >= 4) + { + a = (_wyr4(p) << 32) | _wyr4(p + ((len >> 3) << 2)); + b = (_wyr4(p + len - 4) << 32) | _wyr4(p + len - 4 - ((len >> 3) << 2)); + } + else if (len > 0) + { + a = _wyr3(p, len); + b = 0; + } + else + { + a = b = 0; + } + } + else + { + auto i = len; + if (i > 48) + { + auto seed1 = seed; + auto seed2 = seed; + do + { + seed = _wymix(_wyr8(p) ^ s1, _wyr8(p + 8) ^ seed); + seed1 = _wymix(_wyr8(p + 16) ^ s2, _wyr8(p + 24) ^ seed1); + seed2 = _wymix(_wyr8(p + 32) ^ s3, _wyr8(p + 40) ^ seed2); + p += 48; + i -= 48; + } while (i > 48); + seed ^= seed1 ^ seed2; + } + while (i > 16) + { + seed = _wymix(_wyr8(p) ^ s1, _wyr8(p + 8) ^ seed); + i -= 16; + p += 16; + } + a = _wyr8(p + i - 16); + b = _wyr8(p + i - 8); + } + + return _wymix(s1 ^ len, _wymix(a ^ s1, b ^ seed)); + } + +#endif // defined(TIL_HASH_X86) + + size_t _hash = 0; }; namespace details @@ -61,10 +211,9 @@ namespace til template struct conditionally_enabled_hash_trait { - constexpr void operator()(hasher& h, const T& v) const noexcept + void operator()(hasher& h, const T& v) const noexcept { -#pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1). - h.write(reinterpret_cast(&v), sizeof(T)); + h.write(static_cast(&v), sizeof(T)); } }; @@ -87,80 +236,55 @@ namespace til template<> struct hash_trait { - constexpr void operator()(hasher& h, float v) const noexcept + void operator()(hasher& h, float v) const noexcept { v = v == 0.0f ? 0.0f : v; // map -0 to 0 -#pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1). - h.write(reinterpret_cast(&v), sizeof(v)); + h.write(static_cast(&v), sizeof(v)); } }; template<> struct hash_trait { - constexpr void operator()(hasher& h, double v) const noexcept + void operator()(hasher& h, double v) const noexcept { v = v == 0.0 ? 0.0 : v; // map -0 to 0 -#pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1). - h.write(reinterpret_cast(&v), sizeof(v)); + h.write(static_cast(&v), sizeof(v)); } }; template struct hash_trait> { - constexpr void operator()(hasher& h, const std::basic_string& v) const noexcept + void operator()(hasher& h, const std::basic_string& v) const noexcept { -#pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1). - h.write(reinterpret_cast(v.data()), sizeof(T) * v.size()); + h.write(v.data(), v.size()); } }; template struct hash_trait> { - constexpr void operator()(hasher& h, const std::basic_string_view& v) const noexcept + void operator()(hasher& h, const std::basic_string_view& v) const noexcept { -#pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1). - h.write(reinterpret_cast(v.data()), sizeof(T) * v.size()); + h.write(v.data(), v.size()); } }; template - constexpr size_t hash(const T& v) noexcept + size_t hash(const T& v) noexcept { - if constexpr (sizeof(T) <= sizeof(size_t) && (std::is_integral_v || std::is_enum_v)) - { - // This runs murmurhash3's finalizer (fmix32/fmix64) on a single integer. - // It's fast, public domain and produces good results. - // - // Using til::as_unsigned here allows the compiler to drop the first - // `>> 33` mix for all Ts which are >= 32 bits. - // The existence of sign extension shouldn't change hash quality. - size_t h = til::as_unsigned(v); - if constexpr (sizeof(size_t) == 4) - { - h ^= h >> 16; - h *= UINT32_C(0x85ebca6b); - h ^= h >> 13; - h *= UINT32_C(0xc2b2ae35); - h ^= h >> 16; - } - else - { - h ^= h >> 33; - h *= UINT64_C(0xff51afd7ed558ccd); - h ^= h >> 33; - h *= UINT64_C(0xc4ceb9fe1a85ec53); - h ^= h >> 33; - } - return h; - } - else - { - hasher h; - h.write(v); - return h.finalize(); - } + hasher h; + h.write(v); + return h.finalize(); + } + + inline size_t hash(const void* data, size_t len) noexcept + { + hasher h; + h.write(data, len); + return h.finalize(); } } + +#pragma warning(pop) diff --git a/src/til/ut_til/HashTests.cpp b/src/til/ut_til/HashTests.cpp new file mode 100644 index 0000000000..4b8ba38074 --- /dev/null +++ b/src/til/ut_til/HashTests.cpp @@ -0,0 +1,46 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#include "precomp.h" + +#include + +using namespace WEX::Common; +using namespace WEX::Logging; +using namespace WEX::TestExecution; + +class HashTests +{ + TEST_CLASS(HashTests); + + TEST_METHOD(TestVectors) + { + struct Test + { + std::string_view input; + size_t seed; + uint64_t expected64; + uint32_t expected32; + }; + + static constexpr std::array tests{ + Test{ "", 0, 0x42bc986dc5eec4d3, 0xa45f982f }, + Test{ "a", 1, 0x84508dc903c31551, 0x09021114 }, + Test{ "abc", 2, 0x0bc54887cfc9ecb1, 0xfe40215d }, + Test{ "message digest", 3, 0x6e2ff3298208a67c, 0x6e0fb730 }, + Test{ "abcdefghijklmnopqrstuvwxyz", 4, 0x9a64e42e897195b9, 0x9435b8c2 }, + Test{ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", 5, 0x9199383239c32554, 0xccf9734c }, + Test{ "12345678901234567890123456789012345678901234567890123456789012345678901234567890", 6, 0x7c1ccf6bba30f5a5, 0x9fa5ef6e }, + }; + + for (const auto& t : tests) + { + const auto actual = til::hasher{ t.seed }.write(t.input).finalize(); +#if defined(TIL_HASH_X86) + VERIFY_ARE_EQUAL(t.expected32, actual); +#else + VERIFY_ARE_EQUAL(t.expected64, actual); +#endif + } + } +}; diff --git a/src/til/ut_til/til.unit.tests.vcxproj b/src/til/ut_til/til.unit.tests.vcxproj index 444b0e4cae..0ff3ba29c7 100644 --- a/src/til/ut_til/til.unit.tests.vcxproj +++ b/src/til/ut_til/til.unit.tests.vcxproj @@ -19,6 +19,7 @@ + @@ -35,6 +36,32 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/til/ut_til/til.unit.tests.vcxproj.filters b/src/til/ut_til/til.unit.tests.vcxproj.filters index 3db31b332e..5fb66a2af6 100644 --- a/src/til/ut_til/til.unit.tests.vcxproj.filters +++ b/src/til/ut_til/til.unit.tests.vcxproj.filters @@ -24,8 +24,92 @@ + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + inc + + + + + {7cf29ba4-d33d-4c3b-82e3-ab73e5a79685} + \ No newline at end of file