diff --git a/storm/CMakeLists.txt b/storm/CMakeLists.txt index 458edce..cef64e0 100644 --- a/storm/CMakeLists.txt +++ b/storm/CMakeLists.txt @@ -1,5 +1,6 @@ file(GLOB STORM_SOURCES "*.cpp" + "string/*.cpp" "thread/*.cpp" ) diff --git a/storm/String.cpp b/storm/String.cpp index 013c08e..31975ea 100644 --- a/storm/String.cpp +++ b/storm/String.cpp @@ -1,5 +1,132 @@ #include "storm/String.hpp" #include "storm/Error.hpp" +#include "storm/string/bjhash.hpp" + +uint8_t bytesFromUTF8[256] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6 +}; + +uint32_t offsetsFromUTF8[8] = { + 0, + 0, + 0x3080, + 0x0E2080, + 0x3C82080, + 0x0FA082080, + 0x82082080, + 0 +}; + +void GetNextTextUpper(uint32_t* orig, const char** string, uint32_t* upper) { + uint8_t byte = **string; + int32_t v3 = bytesFromUTF8[byte]; + + *orig = 0; + *upper = 0; + + switch (v3) { + case 6: + case 5: + case 4: + case 3: + case 2: + *orig += **string; + (*string)++; + + if (!**string) { + return; + } + + *orig <<= 6; + + case 1: + *orig += **string; + (*string)++; + + default: + *orig -= offsetsFromUTF8[v3]; + + if (*orig > 0xFFFF) { + *orig = 0xFFFF; + } + + uint32_t v4 = *orig; + + bool v5, v6; + + if (v3 == 1) { + if (v4 < 0x61) { + *upper = v4; + return; + } + + v5 = v4 < 0x7A; + v6 = v4 == 122; + + if (!v5 && !v6) { + *upper = v4; + return; + } + + v4 -= 32; + *upper = v4; + return; + } + + if (v3 != 2) { + *upper = v4; + return; + } + + if (v4 >= 0xE0 && v4 <= 0xFE) { + v4 -= 32; + *upper = v4; + return; + } + + if (v4 == 339) { + *upper = 338; + return; + } + + if (v4 == 1105) { + *upper = 1025; + return; + } + + if (v4 >= 0x430) { + v5 = v4 < 0x44F; + v6 = v4 == 1103; + + if (!v5 && !v6) { + *upper = v4; + return; + } + + v4 -= 32; + *upper = v4; + return; + } + + *upper = v4; + return; + } +} size_t SStrCopy(char* dest, const char* source, size_t destsize) { STORM_ASSERT(dest); @@ -33,3 +160,34 @@ size_t SStrCopy(char* dest, const char* source, size_t destsize) { *destbuf = '\0'; return static_cast(destbuf - dest); } + +uint32_t SStrHashHT(const char* string) { + char normalized[0x400]; + char* buf = normalized; + + uint32_t length = 0; + + if (*string) { + uint32_t value, orig, upper; + + while (*string && length <= 0x3FB) { + // Convert each character to uppercase + GetNextTextUpper(&orig, &string, &upper); + + // Replace forward slash with back slash + value = upper == '/' ? '\\' : upper; + + while (value) { + *buf = value; + value >>= 8; + length++; + buf++; + } + } + } + + // Ensure string is terminated + *buf = 0; + + return bjhash((uint8_t*)&normalized, length, 0); +} diff --git a/storm/String.hpp b/storm/String.hpp index ad90d8b..4845d4f 100644 --- a/storm/String.hpp +++ b/storm/String.hpp @@ -1,8 +1,11 @@ #ifndef STORM_STRING_HPP #define STORM_STRING_HPP +#include #include size_t SStrCopy(char* dest, const char* source, size_t destsize); +uint32_t SStrHashHT(const char* string); + #endif diff --git a/storm/string/bjhash.cpp b/storm/string/bjhash.cpp new file mode 100644 index 0000000..6bce124 --- /dev/null +++ b/storm/string/bjhash.cpp @@ -0,0 +1,87 @@ +#include "storm/string/bjhash.hpp" + +#define mix(a, b, c) \ + { \ + a -= b; \ + a -= c; \ + a ^= (c >> 13); \ + b -= c; \ + b -= a; \ + b ^= (a << 8); \ + c -= a; \ + c -= b; \ + c ^= (b >> 13); \ + a -= b; \ + a -= c; \ + a ^= (c >> 12); \ + b -= c; \ + b -= a; \ + b ^= (a << 16); \ + c -= a; \ + c -= b; \ + c ^= (b >> 5); \ + a -= b; \ + a -= c; \ + a ^= (c >> 3); \ + b -= c; \ + b -= a; \ + b ^= (a << 10); \ + c -= a; \ + c -= b; \ + c ^= (b >> 15); \ + } + +uint32_t bjhash(uint8_t* k, uint32_t length, uint32_t initval) { + uint32_t a, b, c, len; + + len = length; // Set up the internal state + a = b = 0x9e3779b9; // The golden ratio; an arbitrary value + c = initval; // The previous hash value + + // Handle most of the key + while (len >= 12) { + a += (k[0] + ((uint32_t)k[1] << 8) + ((uint32_t)k[2] << 16) + ((uint32_t)k[3] << 24)); + b += (k[4] + ((uint32_t)k[5] << 8) + ((uint32_t)k[6] << 16) + ((uint32_t)k[7] << 24)); + c += (k[8] + ((uint32_t)k[9] << 8) + ((uint32_t)k[10] << 16) + ((uint32_t)k[11] << 24)); + + mix(a, b, c); + + k += 12; + len -= 12; + } + + // Handle the last 11 bytes + c += length; + switch (len) { + // All the case statements fall through + case 11: + c += ((uint32_t)k[10] << 24); + case 10: + c += ((uint32_t)k[9] << 16); + case 9: + c += ((uint32_t)k[8] << 8); + case 8: + // The first byte of c is reserved for the length + b += ((uint32_t)k[7] << 24); + case 7: + b += ((uint32_t)k[6] << 16); + case 6: + b += ((uint32_t)k[5] << 8); + case 5: + b += k[4]; + case 4: + a += ((uint32_t)k[3] << 24); + case 3: + a += ((uint32_t)k[2] << 16); + case 2: + a += ((uint32_t)k[1] << 8); + case 1: + a += k[0]; + case 0:; + // Nothing left to add + } + mix(a, b, c); + + // Report the result + return c; +} diff --git a/storm/string/bjhash.hpp b/storm/string/bjhash.hpp new file mode 100644 index 0000000..cc3f5ef --- /dev/null +++ b/storm/string/bjhash.hpp @@ -0,0 +1,8 @@ +#ifndef STORM_STRING_BJ_HASH_HPP +#define STORM_STRING_BJ_HASH_HPP + +#include + +uint32_t bjhash(uint8_t* k, uint32_t length, uint32_t initval); + +#endif diff --git a/test/String.cpp b/test/String.cpp new file mode 100644 index 0000000..634eece --- /dev/null +++ b/test/String.cpp @@ -0,0 +1,20 @@ +#include "storm/String.hpp" +#include "test/Test.hpp" + +TEST_CASE("SStrHashHT", "[string]") { + SECTION("hashes simple string correctly") { + auto hash = SStrHashHT("foo"); + REQUIRE(hash == 1371562358u); + } + + SECTION("hashes string with forward slash correctly") { + auto hash = SStrHashHT("foo/bar"); + REQUIRE(hash == 2270424393u); + } + + SECTION("hashes string with forward slash equivalent to back slash") { + auto hashForwardSlash = SStrHashHT("foo/bar"); + auto hashBackSlash = SStrHashHT("foo\\bar"); + REQUIRE(hashForwardSlash == hashBackSlash); + } +}