feat(string): add SStrHashHT

This commit is contained in:
fallenoak 2020-11-14 17:18:49 -06:00
parent f631bdac3b
commit abb9e8ed79
No known key found for this signature in database
GPG key ID: 7628F8E61AEA070D
6 changed files with 277 additions and 0 deletions

View file

@ -1,5 +1,6 @@
file(GLOB STORM_SOURCES file(GLOB STORM_SOURCES
"*.cpp" "*.cpp"
"string/*.cpp"
"thread/*.cpp" "thread/*.cpp"
) )

View file

@ -1,5 +1,132 @@
#include "storm/String.hpp" #include "storm/String.hpp"
#include "storm/Error.hpp" #include "storm/Error.hpp"
#include "storm/string/bjhash.hpp"
uint8_t bytesFromUTF8[256] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6
};
uint32_t offsetsFromUTF8[8] = {
0,
0,
0x3080,
0x0E2080,
0x3C82080,
0x0FA082080,
0x82082080,
0
};
void GetNextTextUpper(uint32_t* orig, const char** string, uint32_t* upper) {
uint8_t byte = **string;
int32_t v3 = bytesFromUTF8[byte];
*orig = 0;
*upper = 0;
switch (v3) {
case 6:
case 5:
case 4:
case 3:
case 2:
*orig += **string;
(*string)++;
if (!**string) {
return;
}
*orig <<= 6;
case 1:
*orig += **string;
(*string)++;
default:
*orig -= offsetsFromUTF8[v3];
if (*orig > 0xFFFF) {
*orig = 0xFFFF;
}
uint32_t v4 = *orig;
bool v5, v6;
if (v3 == 1) {
if (v4 < 0x61) {
*upper = v4;
return;
}
v5 = v4 < 0x7A;
v6 = v4 == 122;
if (!v5 && !v6) {
*upper = v4;
return;
}
v4 -= 32;
*upper = v4;
return;
}
if (v3 != 2) {
*upper = v4;
return;
}
if (v4 >= 0xE0 && v4 <= 0xFE) {
v4 -= 32;
*upper = v4;
return;
}
if (v4 == 339) {
*upper = 338;
return;
}
if (v4 == 1105) {
*upper = 1025;
return;
}
if (v4 >= 0x430) {
v5 = v4 < 0x44F;
v6 = v4 == 1103;
if (!v5 && !v6) {
*upper = v4;
return;
}
v4 -= 32;
*upper = v4;
return;
}
*upper = v4;
return;
}
}
size_t SStrCopy(char* dest, const char* source, size_t destsize) { size_t SStrCopy(char* dest, const char* source, size_t destsize) {
STORM_ASSERT(dest); STORM_ASSERT(dest);
@ -33,3 +160,34 @@ size_t SStrCopy(char* dest, const char* source, size_t destsize) {
*destbuf = '\0'; *destbuf = '\0';
return static_cast<size_t>(destbuf - dest); return static_cast<size_t>(destbuf - dest);
} }
uint32_t SStrHashHT(const char* string) {
char normalized[0x400];
char* buf = normalized;
uint32_t length = 0;
if (*string) {
uint32_t value, orig, upper;
while (*string && length <= 0x3FB) {
// Convert each character to uppercase
GetNextTextUpper(&orig, &string, &upper);
// Replace forward slash with back slash
value = upper == '/' ? '\\' : upper;
while (value) {
*buf = value;
value >>= 8;
length++;
buf++;
}
}
}
// Ensure string is terminated
*buf = 0;
return bjhash((uint8_t*)&normalized, length, 0);
}

View file

@ -1,8 +1,11 @@
#ifndef STORM_STRING_HPP #ifndef STORM_STRING_HPP
#define STORM_STRING_HPP #define STORM_STRING_HPP
#include <cstdint>
#include <cstdlib> #include <cstdlib>
size_t SStrCopy(char* dest, const char* source, size_t destsize); size_t SStrCopy(char* dest, const char* source, size_t destsize);
uint32_t SStrHashHT(const char* string);
#endif #endif

87
storm/string/bjhash.cpp Normal file
View file

@ -0,0 +1,87 @@
#include "storm/string/bjhash.hpp"
#define mix(a, b, c) \
{ \
a -= b; \
a -= c; \
a ^= (c >> 13); \
b -= c; \
b -= a; \
b ^= (a << 8); \
c -= a; \
c -= b; \
c ^= (b >> 13); \
a -= b; \
a -= c; \
a ^= (c >> 12); \
b -= c; \
b -= a; \
b ^= (a << 16); \
c -= a; \
c -= b; \
c ^= (b >> 5); \
a -= b; \
a -= c; \
a ^= (c >> 3); \
b -= c; \
b -= a; \
b ^= (a << 10); \
c -= a; \
c -= b; \
c ^= (b >> 15); \
}
uint32_t bjhash(uint8_t* k, uint32_t length, uint32_t initval) {
uint32_t a, b, c, len;
len = length; // Set up the internal state
a = b = 0x9e3779b9; // The golden ratio; an arbitrary value
c = initval; // The previous hash value
// Handle most of the key
while (len >= 12) {
a += (k[0] + ((uint32_t)k[1] << 8) + ((uint32_t)k[2] << 16) + ((uint32_t)k[3] << 24));
b += (k[4] + ((uint32_t)k[5] << 8) + ((uint32_t)k[6] << 16) + ((uint32_t)k[7] << 24));
c += (k[8] + ((uint32_t)k[9] << 8) + ((uint32_t)k[10] << 16) + ((uint32_t)k[11] << 24));
mix(a, b, c);
k += 12;
len -= 12;
}
// Handle the last 11 bytes
c += length;
switch (len) {
// All the case statements fall through
case 11:
c += ((uint32_t)k[10] << 24);
case 10:
c += ((uint32_t)k[9] << 16);
case 9:
c += ((uint32_t)k[8] << 8);
case 8:
// The first byte of c is reserved for the length
b += ((uint32_t)k[7] << 24);
case 7:
b += ((uint32_t)k[6] << 16);
case 6:
b += ((uint32_t)k[5] << 8);
case 5:
b += k[4];
case 4:
a += ((uint32_t)k[3] << 24);
case 3:
a += ((uint32_t)k[2] << 16);
case 2:
a += ((uint32_t)k[1] << 8);
case 1:
a += k[0];
case 0:;
// Nothing left to add
}
mix(a, b, c);
// Report the result
return c;
}

8
storm/string/bjhash.hpp Normal file
View file

@ -0,0 +1,8 @@
#ifndef STORM_STRING_BJ_HASH_HPP
#define STORM_STRING_BJ_HASH_HPP
#include <cstdint>
uint32_t bjhash(uint8_t* k, uint32_t length, uint32_t initval);
#endif

20
test/String.cpp Normal file
View file

@ -0,0 +1,20 @@
#include "storm/String.hpp"
#include "test/Test.hpp"
TEST_CASE("SStrHashHT", "[string]") {
SECTION("hashes simple string correctly") {
auto hash = SStrHashHT("foo");
REQUIRE(hash == 1371562358u);
}
SECTION("hashes string with forward slash correctly") {
auto hash = SStrHashHT("foo/bar");
REQUIRE(hash == 2270424393u);
}
SECTION("hashes string with forward slash equivalent to back slash") {
auto hashForwardSlash = SStrHashHT("foo/bar");
auto hashBackSlash = SStrHashHT("foo\\bar");
REQUIRE(hashForwardSlash == hashBackSlash);
}
}