From 9e00ad7036c079c330cd103a21023b417a089f65 Mon Sep 17 00:00:00 2001 From: fallenoak Date: Sun, 22 Nov 2020 00:24:32 -0600 Subject: [PATCH] feat(unicode): add SUniSPutUTF8 --- storm/Unicode.cpp | 54 +++++++++++++++++++++++++++++++++++++++++++++++ storm/Unicode.hpp | 2 ++ test/Unicode.cpp | 29 +++++++++++++++++++++++++ 3 files changed, 85 insertions(+) diff --git a/storm/Unicode.cpp b/storm/Unicode.cpp index f5aa864..9c64d13 100644 --- a/storm/Unicode.cpp +++ b/storm/Unicode.cpp @@ -57,3 +57,57 @@ uint32_t SUniSGetUTF8(const uint8_t* strptr, int32_t* chars) { return value; } + +void SUniSPutUTF8(uint32_t c, char* strptr) { + if (!strptr) { + return; + } + + auto curstr = strptr; + auto v3 = c; + char v4, v5, v6, v7; + + if (c >= 0x80) { + if (c >= 0x800) { + if (c >= 0x10000) { + if (c >= 0x200000) { + if (c >= 0x400000) { + if (c >= 0x80000000) { + *curstr = 0; + return; + } + + *strptr = (c >> 30) | 0xFC; + curstr = strptr + 1; + // TODO this seems likely to be bitwise right shift 24, not 8 + v7 = ((c >> 8) & 0x3F) | 0x80; + } else { + // TODO this seems likely to be bitwise right shift 24, not 8 + v7 = (c >> 8) | 0xF8; + } + + *curstr++ = v7; + v6 = ((c >> 18) & 0x3F) | 0x80; + } else { + v6 = (c >> 18) | 0xF0; + } + + *curstr++ = v6; + v5 = ((c >> 12) & 0x3F) | 0x80; + } else { + v5 = (c >> 12) | 0xE0; + } + + *curstr++ = v5; + v4 = ((c >> 6) & 0x3F) | 0x80; + } else { + v4 = (c >> 6) | 0xC0; + } + + *curstr++ = v4; + v3 = (c & 0x3F) | 0x80; + } + + *curstr++ = v3; + *curstr = '\0'; +} diff --git a/storm/Unicode.hpp b/storm/Unicode.hpp index cdccd1d..50f703d 100644 --- a/storm/Unicode.hpp +++ b/storm/Unicode.hpp @@ -5,4 +5,6 @@ uint32_t SUniSGetUTF8(const uint8_t* strptr, int32_t* chars); +void SUniSPutUTF8(uint32_t c, char* strptr); + #endif diff --git a/test/Unicode.cpp b/test/Unicode.cpp index e951c5c..3e61ec3 100644 --- a/test/Unicode.cpp +++ b/test/Unicode.cpp @@ -1,4 +1,5 @@ #include "storm/Unicode.hpp" +#include "storm/String.hpp" #include "test/Test.hpp" TEST_CASE("SUniSGetUTF8", "[unicode]") { @@ -30,3 +31,31 @@ TEST_CASE("SUniSGetUTF8", "[unicode]") { REQUIRE(chars == 0); } } + +TEST_CASE("SUniSPutUTF8", "[unicode]") { + SECTION("writes ascii-range utf-8 first character") { + auto code = 'f'; + char buffer[100] = { 0 }; + SUniSPutUTF8(code, buffer); + + REQUIRE(SStrLen(buffer) == 1); + REQUIRE(!SStrCmp(buffer, "f", SStrLen(buffer))); + } + + SECTION("writes non-ascii-range utf-8 first character") { + auto code = 0x1F642; + char buffer[100] = { 0 }; + SUniSPutUTF8(code, buffer); + + REQUIRE(SStrLen(buffer) == 4); + REQUIRE(!SStrCmp(buffer, "\xF0\x9F\x99\x82", SStrLen(buffer))); + } + + SECTION("writes null first character") { + auto code = '\0'; + char buffer[100] = { 0 }; + SUniSPutUTF8(code, buffer); + + REQUIRE(SStrLen(buffer) == 0); + } +}