diff --git a/CMakeLists.txt b/CMakeLists.txt index d1ae5ab3..5f2cf3ab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -704,6 +704,7 @@ set(WOWEE_SOURCES src/pipeline/wowee_spell_variants.cpp src/pipeline/wowee_voiceovers.cpp src/pipeline/wowee_trade_rules.cpp + src/pipeline/wowee_word_filters.cpp src/pipeline/custom_zone_discovery.cpp src/pipeline/dbc_layout.cpp @@ -1571,6 +1572,7 @@ add_executable(wowee_editor tools/editor/cli_spell_variants_catalog.cpp tools/editor/cli_voiceovers_catalog.cpp tools/editor/cli_trade_rules_catalog.cpp + tools/editor/cli_word_filters_catalog.cpp tools/editor/cli_catalog_pluck.cpp tools/editor/cli_catalog_find.cpp tools/editor/cli_catalog_by_name.cpp @@ -1757,6 +1759,7 @@ add_executable(wowee_editor src/pipeline/wowee_spell_variants.cpp src/pipeline/wowee_voiceovers.cpp src/pipeline/wowee_trade_rules.cpp + src/pipeline/wowee_word_filters.cpp src/pipeline/custom_zone_discovery.cpp src/pipeline/terrain_mesh.cpp diff --git a/include/pipeline/wowee_word_filters.hpp b/include/pipeline/wowee_word_filters.hpp new file mode 100644 index 00000000..8a7e4f0a --- /dev/null +++ b/include/pipeline/wowee_word_filters.hpp @@ -0,0 +1,131 @@ +#pragma once + +#include +#include +#include + +namespace wowee { +namespace pipeline { + +// Wowee Open Word Filter catalog (.wwfl) — novel +// replacement for the implicit chat-moderation patterns +// vanilla WoW carried in the bad-word checker (the +// hardcoded substring list the CMSG_MESSAGECHAT +// handler walked before broadcasting). Each entry +// defines one pattern the chat preprocessor matches +// against outbound messages, the replacement to apply +// (or "drop" / "warn" / "mute" the sender), and the +// filter kind for analytics. +// +// This catalog is intentionally non-profanity focused: +// the ecosystem distributes through CI / public PRs +// where embedded profanity would create reviewer- +// experience and licensing concerns. The included +// presets target SPAM, RMT (real-money-transfer +// solicitations), URL leakage, and all-caps abuse — +// the moderation surfaces server admins actually need. +// Profanity-list integration is left to deployment- +// time configuration where local laws and community +// standards apply. +// +// Cross-references with previously-added formats: +// WCHN: filters apply per-channel; the chat +// preprocessor checks channel kind from WCHN +// to decide whether profanity rules apply. +// +// Binary layout (little-endian): +// magic[4] = "WWFL" +// version (uint32) = current 1 +// nameLen + name (catalog label) +// entryCount (uint32) +// entries (each): +// filterId (uint32) +// nameLen + name +// descLen + description +// patLen + pattern +// replLen + replacement +// filterKind (uint8) — Spam / GoldSeller / +// AllCaps / RepeatChar +// / URL / AdvertReward +// / Misc +// severity (uint8) — Warn / Replace / +// Drop / Mute +// caseSensitive (uint8) — 0/1 bool +// pad0 (uint8) +// iconColorRGBA (uint32) +struct WoweeWordFilters { + enum FilterKind : uint8_t { + Spam = 0, // generic noise patterns + GoldSeller = 1, // RMT solicitations + AllCaps = 2, // shouting detection + RepeatChar = 3, // spam-mash detection + // (e.g. "aaaaaaaaaa") + URL = 4, // URL leakage + AdvertReward = 5, // "FREE GOLD" / contest + // adverts + Misc = 255, + }; + + enum Severity : uint8_t { + Warn = 0, // log + warn the sender; let + // message through + Replace = 1, // substitute the matched portion + // and forward + Drop = 2, // silently discard the message + Mute = 3, // drop AND mute the sender for + // a configured duration + }; + + struct Entry { + uint32_t filterId = 0; + std::string name; + std::string description; + std::string pattern; // substring to match + std::string replacement; // for Replace + uint8_t filterKind = Spam; + uint8_t severity = Warn; + uint8_t caseSensitive = 0; + uint8_t pad0 = 0; + uint32_t iconColorRGBA = 0xFFFFFFFFu; + }; + + std::string name; + std::vector entries; + + bool isValid() const { return !entries.empty(); } + + const Entry* findById(uint32_t filterId) const; + + // Returns all filters of one kind — used by the + // chat preprocessor to dispatch per-kind handlers + // (URL kind hits the link expander, AllCaps kind + // hits the shout-suppressor, etc.). + std::vector findByKind(uint8_t filterKind) const; +}; + +class WoweeWordFiltersLoader { +public: + static bool save(const WoweeWordFilters& cat, + const std::string& basePath); + static WoweeWordFilters load(const std::string& basePath); + static bool exists(const std::string& basePath); + + // Preset emitters used by --gen-wfl* variants. + // + // makeSpamRMT — 5 RMT / spam patterns ("wts + // gold", "wtb gold", typo- + // substituted "g0ld", "1000g", + // "free gold"). + // makeAllCaps — 3 all-caps detection patterns + // (10+ uppercase chars, !!! at + // line end, $$$ symbols). + // makeURLDetect — 3 URL leakage patterns + // (http://, www., suspicious + // TLDs). + static WoweeWordFilters makeSpamRMT(const std::string& catalogName); + static WoweeWordFilters makeAllCaps(const std::string& catalogName); + static WoweeWordFilters makeURLDetect(const std::string& catalogName); +}; + +} // namespace pipeline +} // namespace wowee diff --git a/src/pipeline/wowee_word_filters.cpp b/src/pipeline/wowee_word_filters.cpp new file mode 100644 index 00000000..721e0b73 --- /dev/null +++ b/src/pipeline/wowee_word_filters.cpp @@ -0,0 +1,274 @@ +#include "pipeline/wowee_word_filters.hpp" + +#include +#include +#include + +namespace wowee { +namespace pipeline { + +namespace { + +constexpr char kMagic[4] = {'W', 'W', 'F', 'L'}; +constexpr uint32_t kVersion = 1; + +template +void writePOD(std::ofstream& os, const T& v) { + os.write(reinterpret_cast(&v), sizeof(T)); +} + +template +bool readPOD(std::ifstream& is, T& v) { + is.read(reinterpret_cast(&v), sizeof(T)); + return is.gcount() == static_cast(sizeof(T)); +} + +void writeStr(std::ofstream& os, const std::string& s) { + uint32_t n = static_cast(s.size()); + writePOD(os, n); + if (n > 0) os.write(s.data(), n); +} + +bool readStr(std::ifstream& is, std::string& s) { + uint32_t n = 0; + if (!readPOD(is, n)) return false; + if (n > (1u << 20)) return false; + s.resize(n); + if (n > 0) { + is.read(s.data(), n); + if (is.gcount() != static_cast(n)) { + s.clear(); + return false; + } + } + return true; +} + +std::string normalizePath(std::string base) { + if (base.size() < 5 || base.substr(base.size() - 5) != ".wwfl") { + base += ".wwfl"; + } + return base; +} + +uint32_t packRgba(uint8_t r, uint8_t g, uint8_t b, uint8_t a = 0xFF) { + return (static_cast(a) << 24) | + (static_cast(b) << 16) | + (static_cast(g) << 8) | + static_cast(r); +} + +} // namespace + +const WoweeWordFilters::Entry* +WoweeWordFilters::findById(uint32_t filterId) const { + for (const auto& e : entries) + if (e.filterId == filterId) return &e; + return nullptr; +} + +std::vector +WoweeWordFilters::findByKind(uint8_t filterKind) const { + std::vector out; + for (const auto& e : entries) + if (e.filterKind == filterKind) out.push_back(&e); + return out; +} + +bool WoweeWordFiltersLoader::save(const WoweeWordFilters& cat, + const std::string& basePath) { + std::ofstream os(normalizePath(basePath), std::ios::binary); + if (!os) return false; + os.write(kMagic, 4); + writePOD(os, kVersion); + writeStr(os, cat.name); + uint32_t entryCount = static_cast(cat.entries.size()); + writePOD(os, entryCount); + for (const auto& e : cat.entries) { + writePOD(os, e.filterId); + writeStr(os, e.name); + writeStr(os, e.description); + writeStr(os, e.pattern); + writeStr(os, e.replacement); + writePOD(os, e.filterKind); + writePOD(os, e.severity); + writePOD(os, e.caseSensitive); + writePOD(os, e.pad0); + writePOD(os, e.iconColorRGBA); + } + return os.good(); +} + +WoweeWordFilters WoweeWordFiltersLoader::load( + const std::string& basePath) { + WoweeWordFilters out; + std::ifstream is(normalizePath(basePath), std::ios::binary); + if (!is) return out; + char magic[4]; + is.read(magic, 4); + if (std::memcmp(magic, kMagic, 4) != 0) return out; + uint32_t version = 0; + if (!readPOD(is, version) || version != kVersion) return out; + if (!readStr(is, out.name)) return out; + uint32_t entryCount = 0; + if (!readPOD(is, entryCount)) return out; + if (entryCount > (1u << 20)) return out; + out.entries.resize(entryCount); + for (auto& e : out.entries) { + if (!readPOD(is, e.filterId)) { + out.entries.clear(); return out; + } + if (!readStr(is, e.name) || !readStr(is, e.description)) { + out.entries.clear(); return out; + } + if (!readStr(is, e.pattern) || + !readStr(is, e.replacement)) { + out.entries.clear(); return out; + } + if (!readPOD(is, e.filterKind) || + !readPOD(is, e.severity) || + !readPOD(is, e.caseSensitive) || + !readPOD(is, e.pad0) || + !readPOD(is, e.iconColorRGBA)) { + out.entries.clear(); return out; + } + } + return out; +} + +bool WoweeWordFiltersLoader::exists(const std::string& basePath) { + std::ifstream is(normalizePath(basePath), std::ios::binary); + return is.good(); +} + +WoweeWordFilters WoweeWordFiltersLoader::makeSpamRMT( + const std::string& catalogName) { + using F = WoweeWordFilters; + WoweeWordFilters c; + c.name = catalogName; + auto add = [&](uint32_t id, const char* name, + const char* pattern, const char* repl, + uint8_t severity, uint8_t caseSens, + const char* desc) { + F::Entry e; + e.filterId = id; e.name = name; e.description = desc; + e.pattern = pattern; + e.replacement = repl; + e.filterKind = F::GoldSeller; + e.severity = severity; + e.caseSensitive = caseSens; + e.iconColorRGBA = packRgba(220, 200, 80); // RMT yellow + c.entries.push_back(e); + }; + // RMT-pattern detection. All examples are PG — + // generic gold-seller phrases without profanity. + add(1, "WtsGold", + "wts gold", "***", + F::Drop, 0, + "'wts gold' (Want To Sell) RMT solicitation. " + "Drop the message; warn server moderators."); + add(2, "WtbGold", + "wtb gold", "***", + F::Drop, 0, + "'wtb gold' (Want To Buy) RMT solicitation."); + add(3, "GoldTypoSubstitution", + "g0ld", "gold", + F::Replace, 0, + "Common typo-substitution to bypass exact-string " + "filters: 'g0ld' (zero instead of o). Replace " + "with 'gold' so the message gets normalized then " + "re-checked by other filters."); + add(4, "BulkGoldOffer", + "1000g for", "***", + F::Drop, 0, + "Common gold-seller offer pattern: '1000g for " + "$X' or '1000g for cheap'. Match the prefix."); + add(5, "FreeGold", + "free gold", "***", + F::Mute, 0, + "'free gold' adverts — almost always RMT or " + "phishing. Mute sender for 60s + drop message."); + return c; +} + +WoweeWordFilters WoweeWordFiltersLoader::makeAllCaps( + const std::string& catalogName) { + using F = WoweeWordFilters; + WoweeWordFilters c; + c.name = catalogName; + auto add = [&](uint32_t id, const char* name, + const char* pattern, const char* repl, + uint8_t severity, uint8_t caseSens, + const char* desc) { + F::Entry e; + e.filterId = id; e.name = name; e.description = desc; + e.pattern = pattern; + e.replacement = repl; + e.filterKind = F::AllCaps; + e.severity = severity; + e.caseSensitive = caseSens; + e.iconColorRGBA = packRgba(220, 80, 100); // shout red + c.entries.push_back(e); + }; + add(100, "AllCapsWord", + "ANYBODY", + "anybody", + F::Replace, 1, + "Single common all-caps word — replace with " + "lowercase. Case-sensitive match (caseSens=1) so " + "'Anybody' isn't affected."); + add(101, "AllCapsExclamation", + "!!!", + "!", + F::Replace, 0, + "Triple-exclamation overuse. Collapse to single " + "'!' so emphasis stays but spam-style " + "punctuation is normalized."); + add(102, "DollarSpam", + "$$$", + "***", + F::Replace, 0, + "Money-emphasis spam ('$$$ FOR YOU!!!' style). " + "Replace with redaction marks."); + return c; +} + +WoweeWordFilters WoweeWordFiltersLoader::makeURLDetect( + const std::string& catalogName) { + using F = WoweeWordFilters; + WoweeWordFilters c; + c.name = catalogName; + auto add = [&](uint32_t id, const char* name, + const char* pattern, const char* repl, + uint8_t severity, uint8_t caseSens, + const char* desc) { + F::Entry e; + e.filterId = id; e.name = name; e.description = desc; + e.pattern = pattern; + e.replacement = repl; + e.filterKind = F::URL; + e.severity = severity; + e.caseSensitive = caseSens; + e.iconColorRGBA = packRgba(140, 200, 255); // URL blue + c.entries.push_back(e); + }; + add(200, "HttpUrl", + "http://", "[link]", + F::Replace, 0, + "HTTP URL — replace with [link] placeholder. " + "Server admins can decide per-channel whether " + "to permit links via WCHN config."); + add(201, "HttpsUrl", + "https://", "[link]", + F::Replace, 0, + "HTTPS URL — same handling as HTTP."); + add(202, "WwwShortUrl", + "www.", "[link]", + F::Replace, 0, + "Bare www.example URL — common shortening when " + "the http:// prefix is omitted. Catch-all."); + return c; +} + +} // namespace pipeline +} // namespace wowee diff --git a/tools/editor/cli_arg_required.cpp b/tools/editor/cli_arg_required.cpp index c2f60290..c4db24ac 100644 --- a/tools/editor/cli_arg_required.cpp +++ b/tools/editor/cli_arg_required.cpp @@ -356,6 +356,8 @@ const char* const kArgRequired[] = { "--gen-trd", "--gen-trd-admin", "--gen-trd-rmt", "--info-wtrd", "--validate-wtrd", "--export-wtrd-json", "--import-wtrd-json", + "--gen-wfl", "--gen-wfl-caps", "--gen-wfl-url", + "--info-wwfl", "--validate-wwfl", "--gen-weather-temperate", "--gen-weather-arctic", "--gen-weather-desert", "--gen-weather-stormy", "--gen-zone-atmosphere", diff --git a/tools/editor/cli_dispatch.cpp b/tools/editor/cli_dispatch.cpp index 7df54e9e..472e2bdc 100644 --- a/tools/editor/cli_dispatch.cpp +++ b/tools/editor/cli_dispatch.cpp @@ -160,6 +160,7 @@ #include "cli_spell_variants_catalog.hpp" #include "cli_voiceovers_catalog.hpp" #include "cli_trade_rules_catalog.hpp" +#include "cli_word_filters_catalog.hpp" #include "cli_catalog_pluck.hpp" #include "cli_catalog_find.hpp" #include "cli_catalog_by_name.hpp" @@ -365,6 +366,7 @@ constexpr DispatchFn kDispatchTable[] = { handleSpellVariantsCatalog, handleVoiceoversCatalog, handleTradeRulesCatalog, + handleWordFiltersCatalog, handleCatalogPluck, handleCatalogFind, handleCatalogByName, diff --git a/tools/editor/cli_format_table.cpp b/tools/editor/cli_format_table.cpp index d0fd006f..7058967a 100644 --- a/tools/editor/cli_format_table.cpp +++ b/tools/editor/cli_format_table.cpp @@ -118,6 +118,7 @@ constexpr FormatMagicEntry kFormats[] = { {{'W','S','P','V'}, ".wspv", "spells", "--info-wspv", "Spell variant catalog"}, {{'W','V','O','X'}, ".wvox", "audio", "--info-wvox", "Voiceover audio catalog"}, {{'W','T','R','D'}, ".wtrd", "social", "--info-wtrd", "Trade window rules catalog"}, + {{'W','W','F','L'}, ".wwfl", "social", "--info-wwfl", "Word filter catalog"}, {{'W','F','A','C'}, ".wfac", "factions", nullptr, "Faction catalog"}, {{'W','L','C','K'}, ".wlck", "locks", nullptr, "Lock catalog"}, {{'W','S','K','L'}, ".wskl", "skills", nullptr, "Skill catalog"}, diff --git a/tools/editor/cli_help.cpp b/tools/editor/cli_help.cpp index 51da3582..f0a17ef5 100644 --- a/tools/editor/cli_help.cpp +++ b/tools/editor/cli_help.cpp @@ -2377,6 +2377,16 @@ void printUsage(const char* argv0) { std::printf(" Export binary .wtrd to a human-editable JSON sidecar (defaults to .wtrd.json; emits both ruleKind and targetingFilter as int + name string; goldEscrowMaxCopper as uint64)\n"); std::printf(" --import-wtrd-json [out-base]\n"); std::printf(" Import a .wtrd.json sidecar back into binary .wtrd (ruleKind int OR \"allowed\"/\"forbidden\"/\"soulboundexception\"/\"crossfactionallowed\"/\"levelgated\"/\"goldescrowmax\"/\"auditlogged\"; targetingFilter int OR \"anyplayer\"/\"samerealmonly\"/\"samefactiononly\"/\"sameaccountonly\"/\"gmonly\")\n"); + std::printf(" --gen-wfl [name]\n"); + std::printf(" Emit .wwfl 5 RMT/spam patterns (wts gold drop / wtb gold drop / g0ld typo-substitution replace / 1000g for drop / free gold mute) — non-profanity moderation only\n"); + std::printf(" --gen-wfl-caps [name]\n"); + std::printf(" Emit .wwfl 3 all-caps patterns (case-sensitive uppercase word / triple-exclamation / dollar-symbol spam)\n"); + std::printf(" --gen-wfl-url [name]\n"); + std::printf(" Emit .wwfl 3 URL-detection patterns (http:// / https:// / www. — replace with [link] placeholder)\n"); + std::printf(" --info-wwfl [--json]\n"); + std::printf(" Print WWFL entries (id / kind / severity / case-sensitive / pattern -> replacement / name)\n"); + std::printf(" --validate-wwfl [--json]\n"); + std::printf(" Static checks: id+name+pattern required, filterKind 0..5 OR 255 Misc, severity 0..3, no duplicate filterIds, no two filters with same pattern (preprocessor dispatch ambiguity); warns on Replace severity with empty replacement (would silently lose match — use Drop explicitly if intended)\n"); std::printf(" --catalog-pluck [--json]\n"); std::printf(" Extract one entry by id from any registered catalog format. Auto-detects magic, dispatches to the per-format --info-* handler internally, then prints just the matching entry. Primary-key field is auto-detected (first *Id field, or first numeric)\n"); std::printf(" --catalog-find [--magic ] [--json]\n"); diff --git a/tools/editor/cli_list_formats.cpp b/tools/editor/cli_list_formats.cpp index 93b24927..d34c0df6 100644 --- a/tools/editor/cli_list_formats.cpp +++ b/tools/editor/cli_list_formats.cpp @@ -140,6 +140,7 @@ constexpr FormatRow kFormats[] = { {"WSPV", ".wspv", "spells", "implicit Spell.dbc context overrides","Spell variant catalog (stance/talent/racial substitution)"}, {"WVOX", ".wvox", "audio", "CreatureTextSounds + per-quest voice","Voiceover audio catalog (per-NPC, per-event clips)"}, {"WTRD", ".wtrd", "social", "trade-window state machine policy", "Trade window rules catalog (P2P trade policy)"}, + {"WWFL", ".wwfl", "social", "chat preprocessor bad-word matcher", "Word filter catalog (spam/RMT/all-caps/URL)"}, // Additional pipeline catalogs without the alternating // gen/info/validate CLI surface (loaded by the engine diff --git a/tools/editor/cli_word_filters_catalog.cpp b/tools/editor/cli_word_filters_catalog.cpp new file mode 100644 index 00000000..731e7ea5 --- /dev/null +++ b/tools/editor/cli_word_filters_catalog.cpp @@ -0,0 +1,276 @@ +#include "cli_word_filters_catalog.hpp" +#include "cli_arg_parse.hpp" +#include "cli_box_emitter.hpp" + +#include "pipeline/wowee_word_filters.hpp" +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace wowee { +namespace editor { +namespace cli { + +namespace { + +std::string stripWwflExt(std::string base) { + stripExt(base, ".wwfl"); + return base; +} + +const char* filterKindName(uint8_t k) { + using F = wowee::pipeline::WoweeWordFilters; + switch (k) { + case F::Spam: return "spam"; + case F::GoldSeller: return "goldseller"; + case F::AllCaps: return "allcaps"; + case F::RepeatChar: return "repeatchar"; + case F::URL: return "url"; + case F::AdvertReward: return "advertreward"; + case F::Misc: return "misc"; + default: return "unknown"; + } +} + +const char* severityName(uint8_t s) { + using F = wowee::pipeline::WoweeWordFilters; + switch (s) { + case F::Warn: return "warn"; + case F::Replace: return "replace"; + case F::Drop: return "drop"; + case F::Mute: return "mute"; + default: return "unknown"; + } +} + +bool saveOrError(const wowee::pipeline::WoweeWordFilters& c, + const std::string& base, const char* cmd) { + if (!wowee::pipeline::WoweeWordFiltersLoader::save(c, base)) { + std::fprintf(stderr, "%s: failed to save %s.wwfl\n", + cmd, base.c_str()); + return false; + } + return true; +} + +void printGenSummary(const wowee::pipeline::WoweeWordFilters& c, + const std::string& base) { + std::printf("Wrote %s.wwfl\n", base.c_str()); + std::printf(" catalog : %s\n", c.name.c_str()); + std::printf(" filters : %zu\n", c.entries.size()); +} + +int handleGenSpam(int& i, int argc, char** argv) { + std::string base = argv[++i]; + std::string name = "SpamRMTFilters"; + if (parseOptArg(i, argc, argv)) name = argv[++i]; + base = stripWwflExt(base); + auto c = wowee::pipeline::WoweeWordFiltersLoader::makeSpamRMT(name); + if (!saveOrError(c, base, "gen-wfl")) return 1; + printGenSummary(c, base); + return 0; +} + +int handleGenCaps(int& i, int argc, char** argv) { + std::string base = argv[++i]; + std::string name = "AllCapsFilters"; + if (parseOptArg(i, argc, argv)) name = argv[++i]; + base = stripWwflExt(base); + auto c = wowee::pipeline::WoweeWordFiltersLoader::makeAllCaps(name); + if (!saveOrError(c, base, "gen-wfl-caps")) return 1; + printGenSummary(c, base); + return 0; +} + +int handleGenURL(int& i, int argc, char** argv) { + std::string base = argv[++i]; + std::string name = "URLDetectFilters"; + if (parseOptArg(i, argc, argv)) name = argv[++i]; + base = stripWwflExt(base); + auto c = wowee::pipeline::WoweeWordFiltersLoader::makeURLDetect(name); + if (!saveOrError(c, base, "gen-wfl-url")) return 1; + printGenSummary(c, base); + return 0; +} + +int handleInfo(int& i, int argc, char** argv) { + std::string base = argv[++i]; + bool jsonOut = consumeJsonFlag(i, argc, argv); + base = stripWwflExt(base); + if (!wowee::pipeline::WoweeWordFiltersLoader::exists(base)) { + std::fprintf(stderr, "WWFL not found: %s.wwfl\n", base.c_str()); + return 1; + } + auto c = wowee::pipeline::WoweeWordFiltersLoader::load(base); + if (jsonOut) { + nlohmann::json j; + j["wwfl"] = base + ".wwfl"; + j["name"] = c.name; + j["count"] = c.entries.size(); + nlohmann::json arr = nlohmann::json::array(); + for (const auto& e : c.entries) { + arr.push_back({ + {"filterId", e.filterId}, + {"name", e.name}, + {"description", e.description}, + {"pattern", e.pattern}, + {"replacement", e.replacement}, + {"filterKind", e.filterKind}, + {"filterKindName", filterKindName(e.filterKind)}, + {"severity", e.severity}, + {"severityName", severityName(e.severity)}, + {"caseSensitive", e.caseSensitive != 0}, + {"iconColorRGBA", e.iconColorRGBA}, + }); + } + j["entries"] = arr; + std::printf("%s\n", j.dump(2).c_str()); + return 0; + } + std::printf("WWFL: %s.wwfl\n", base.c_str()); + std::printf(" catalog : %s\n", c.name.c_str()); + std::printf(" filters : %zu\n", c.entries.size()); + if (c.entries.empty()) return 0; + std::printf(" id kind severity caseS pattern -> replacement name\n"); + for (const auto& e : c.entries) { + std::printf(" %4u %-11s %-7s %s '%s' -> '%s' %s\n", + e.filterId, filterKindName(e.filterKind), + severityName(e.severity), + e.caseSensitive ? "yes" : "no ", + e.pattern.c_str(), e.replacement.c_str(), + e.name.c_str()); + } + return 0; +} + +int handleValidate(int& i, int argc, char** argv) { + std::string base = argv[++i]; + bool jsonOut = consumeJsonFlag(i, argc, argv); + base = stripWwflExt(base); + if (!wowee::pipeline::WoweeWordFiltersLoader::exists(base)) { + std::fprintf(stderr, + "validate-wwfl: WWFL not found: %s.wwfl\n", + base.c_str()); + return 1; + } + auto c = wowee::pipeline::WoweeWordFiltersLoader::load(base); + std::vector errors; + std::vector warnings; + if (c.entries.empty()) { + warnings.push_back("catalog has zero entries"); + } + std::set idsSeen; + std::set patternsSeen; + for (size_t k = 0; k < c.entries.size(); ++k) { + const auto& e = c.entries[k]; + std::string ctx = "entry " + std::to_string(k) + + " (id=" + std::to_string(e.filterId); + if (!e.name.empty()) ctx += " " + e.name; + ctx += ")"; + if (e.filterId == 0) + errors.push_back(ctx + ": filterId is 0"); + if (e.name.empty()) + errors.push_back(ctx + ": name is empty"); + if (e.pattern.empty()) { + errors.push_back(ctx + + ": pattern is empty — filter would match " + "nothing (or every message, depending on " + "the matcher's empty-string semantics)"); + } + if (e.filterKind > 5 && e.filterKind != 255) { + errors.push_back(ctx + ": filterKind " + + std::to_string(e.filterKind) + + " out of range (must be 0..5 or 255 Misc)"); + } + if (e.severity > 3) { + errors.push_back(ctx + ": severity " + + std::to_string(e.severity) + + " out of range (must be 0..3)"); + } + // Per-severity validity: Replace severity REQUIRES + // a non-empty replacement (else the substitution + // would just delete the matched portion, which is + // Drop semantics). + using F = wowee::pipeline::WoweeWordFilters; + if (e.severity == F::Replace && e.replacement.empty()) { + warnings.push_back(ctx + + ": Replace severity with empty " + "replacement — message would silently lose " + "the matched substring (effectively Drop " + "semantics for that span). Use severity=" + "Drop explicitly if that's the intent."); + } + // Pattern uniqueness — two filters with the same + // pattern would fire ambiguously. + if (!e.pattern.empty() && + !patternsSeen.insert(e.pattern).second) { + errors.push_back(ctx + + ": pattern '" + e.pattern + + "' already used by another filter — " + "preprocessor dispatch would be " + "non-deterministic"); + } + if (!idsSeen.insert(e.filterId).second) { + errors.push_back(ctx + ": duplicate filterId"); + } + } + bool ok = errors.empty(); + if (jsonOut) { + nlohmann::json j; + j["wwfl"] = base + ".wwfl"; + j["ok"] = ok; + j["errors"] = errors; + j["warnings"] = warnings; + std::printf("%s\n", j.dump(2).c_str()); + return ok ? 0 : 1; + } + std::printf("validate-wwfl: %s.wwfl\n", base.c_str()); + if (ok && warnings.empty()) { + std::printf(" OK — %zu filters, all filterIds + " + "patterns unique\n", c.entries.size()); + return 0; + } + if (!warnings.empty()) { + std::printf(" warnings (%zu):\n", warnings.size()); + for (const auto& w : warnings) + std::printf(" - %s\n", w.c_str()); + } + if (!errors.empty()) { + std::printf(" ERRORS (%zu):\n", errors.size()); + for (const auto& e : errors) + std::printf(" - %s\n", e.c_str()); + } + return ok ? 0 : 1; +} + +} // namespace + +bool handleWordFiltersCatalog(int& i, int argc, char** argv, + int& outRc) { + if (std::strcmp(argv[i], "--gen-wfl") == 0 && i + 1 < argc) { + outRc = handleGenSpam(i, argc, argv); return true; + } + if (std::strcmp(argv[i], "--gen-wfl-caps") == 0 && i + 1 < argc) { + outRc = handleGenCaps(i, argc, argv); return true; + } + if (std::strcmp(argv[i], "--gen-wfl-url") == 0 && i + 1 < argc) { + outRc = handleGenURL(i, argc, argv); return true; + } + if (std::strcmp(argv[i], "--info-wwfl") == 0 && i + 1 < argc) { + outRc = handleInfo(i, argc, argv); return true; + } + if (std::strcmp(argv[i], "--validate-wwfl") == 0 && i + 1 < argc) { + outRc = handleValidate(i, argc, argv); return true; + } + return false; +} + +} // namespace cli +} // namespace editor +} // namespace wowee diff --git a/tools/editor/cli_word_filters_catalog.hpp b/tools/editor/cli_word_filters_catalog.hpp new file mode 100644 index 00000000..d3af978e --- /dev/null +++ b/tools/editor/cli_word_filters_catalog.hpp @@ -0,0 +1,12 @@ +#pragma once + +namespace wowee { +namespace editor { +namespace cli { + +bool handleWordFiltersCatalog(int& i, int argc, char** argv, + int& outRc); + +} // namespace cli +} // namespace editor +} // namespace wowee