Opcode registry: move to generated canonical+alias pipeline

Introduce data-driven opcode registry with canonical and alias sources:

- Added Data/opcodes/canonical.json as the single canonical LogicalOpcode set.

- Added Data/opcodes/aliases.json for cross-core naming aliases (CMaNGOS/AzerothCore/local legacy).

Added generator and generated include fragments:

- tools/gen_opcode_registry.py emits include/game/opcode_enum_generated.inc, include/game/opcode_names_generated.inc, and include/game/opcode_aliases_generated.inc.

- include/game/opcode_table.hpp now consumes generated enum entries.

- src/game/opcode_table.cpp now consumes generated name and alias tables.

Loader canonicalization behavior:

- OpcodeTable::nameToLogical canonicalizes incoming JSON opcode names via alias table before enum lookup, so implementation code stays stable while expansion maps can use different core spellings.

Validation and build integration:

- Added tools/validate_opcode_maps.py to validate canonical contract across expansions.

- Added CMake targets opcodes-generate and opcodes-validate.

- wowee target now depends on opcodes-generate so generated headers stay current.

Validation/build run:

- cmake -S . -B build

- cmake --build build --target opcodes-generate opcodes-validate

- cmake --build build -j32
This commit is contained in:
Kelsi 2026-02-20 03:02:31 -08:00
parent 52ac3bcba3
commit 610ed71922
10 changed files with 4572 additions and 2812 deletions

View file

@ -0,0 +1,96 @@
#!/usr/bin/env python3
"""
Generate opcode registry include fragments from data files.
Inputs:
- Data/opcodes/canonical.json
- Data/opcodes/aliases.json
Outputs:
- include/game/opcode_enum_generated.inc
- include/game/opcode_names_generated.inc
- include/game/opcode_aliases_generated.inc
"""
from __future__ import annotations
import json
import re
from pathlib import Path
RE_NAME = re.compile(r"^(?:CMSG|SMSG|MSG)_[A-Z0-9_]+$")
def load_canonical(path: Path) -> list[str]:
data = json.loads(path.read_text())
names = data.get("logical_opcodes", [])
if not isinstance(names, list):
raise ValueError("canonical.json: logical_opcodes must be a list")
out: list[str] = []
seen: set[str] = set()
for raw in names:
if not isinstance(raw, str) or not RE_NAME.match(raw):
raise ValueError(f"Invalid canonical opcode name: {raw!r}")
if raw in seen:
continue
seen.add(raw)
out.append(raw)
return out
def load_aliases(path: Path, canonical: set[str]) -> dict[str, str]:
data = json.loads(path.read_text())
aliases = data.get("aliases", {})
if not isinstance(aliases, dict):
raise ValueError("aliases.json: aliases must be an object")
out: dict[str, str] = {}
for alias, target in sorted(aliases.items()):
if not isinstance(alias, str) or not RE_NAME.match(alias):
raise ValueError(f"Invalid alias opcode name: {alias!r}")
if not isinstance(target, str) or not RE_NAME.match(target):
raise ValueError(f"Invalid alias target opcode name: {target!r}")
if target not in canonical:
raise ValueError(f"Alias target not in canonical set: {alias} -> {target}")
out[alias] = target
return out
def write_file(path: Path, content: str) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(content)
def main() -> int:
root = Path(__file__).resolve().parent.parent
data_dir = root / "Data/opcodes"
inc_dir = root / "include/game"
canonical_names = load_canonical(data_dir / "canonical.json")
canonical_set = set(canonical_names)
aliases = load_aliases(data_dir / "aliases.json", canonical_set)
enum_lines = ["// GENERATED FILE - DO NOT EDIT", ""]
enum_lines += [f" {name}," for name in canonical_names]
enum_content = "\n".join(enum_lines) + "\n"
name_lines = ["// GENERATED FILE - DO NOT EDIT", ""]
name_lines += [f' {{"{name}", LogicalOpcode::{name}}},' for name in canonical_names]
names_content = "\n".join(name_lines) + "\n"
alias_lines = ["// GENERATED FILE - DO NOT EDIT", ""]
alias_lines += [f' {{"{alias}", "{target}"}},' for alias, target in aliases.items()]
aliases_content = "\n".join(alias_lines) + "\n"
write_file(inc_dir / "opcode_enum_generated.inc", enum_content)
write_file(inc_dir / "opcode_names_generated.inc", names_content)
write_file(inc_dir / "opcode_aliases_generated.inc", aliases_content)
print(
f"generated: canonical={len(canonical_names)} aliases={len(aliases)} "
f"-> include/game/opcode_*_generated.inc"
)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -0,0 +1,171 @@
#!/usr/bin/env python3
"""
Validate opcode canonicalization and expansion mappings.
Checks:
1. Every enum opcode appears in kOpcodeNames.
2. Every expansion JSON key resolves to a canonical opcode name (direct or alias).
3. Every opcode referenced as Opcode::<NAME> in implementation code exists in each expansion map
after alias canonicalization.
"""
from __future__ import annotations
import argparse
import json
import re
from pathlib import Path
from typing import Dict, Iterable, List, Set
RE_OPCODE_NAME = re.compile(r"^(?:CMSG|SMSG|MSG)_[A-Z0-9_]+$")
RE_CODE_REF = re.compile(r"\bOpcode::((?:CMSG|SMSG|MSG)_[A-Z0-9_]+)\b")
def read_canonical_data(path: Path) -> Set[str]:
data = json.loads(path.read_text())
names = data.get("logical_opcodes", [])
return {n for n in names if isinstance(n, str) and RE_OPCODE_NAME.match(n)}
def read_alias_data(path: Path) -> Dict[str, str]:
data = json.loads(path.read_text())
aliases = data.get("aliases", {})
out: Dict[str, str] = {}
for k, v in aliases.items():
if isinstance(k, str) and isinstance(v, str) and RE_OPCODE_NAME.match(k) and RE_OPCODE_NAME.match(v):
out[k] = v
return out
def canonicalize(name: str, aliases: Dict[str, str]) -> str:
seen: Set[str] = set()
current = name
while current in aliases and current not in seen:
seen.add(current)
current = aliases[current]
return current
def iter_expansion_files(expansions_dir: Path) -> Iterable[Path]:
for p in sorted(expansions_dir.glob("*/opcodes.json")):
yield p
def load_expansion_names(path: Path) -> Dict[str, str]:
data = json.loads(path.read_text())
out: Dict[str, str] = {}
for k, v in data.items():
if RE_OPCODE_NAME.match(k):
out[k] = str(v)
return out
def collect_code_refs(root: Path) -> Set[str]:
refs: Set[str] = set()
skip_suffixes = {
"include/game/opcode_table.hpp",
"src/game/opcode_table.cpp",
}
for p in list(root.glob("src/**/*.cpp")) + list(root.glob("include/**/*.hpp")):
rel = p.as_posix()
if rel in skip_suffixes:
continue
text = p.read_text(errors="ignore")
for m in RE_CODE_REF.finditer(text):
refs.add(m.group(1))
return refs
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--root", default=".")
parser.add_argument(
"--strict-required",
action="store_true",
help="Fail when expansion maps miss opcodes referenced by implementation code.",
)
args = parser.parse_args()
root = Path(args.root).resolve()
canonical_path = root / "Data/opcodes/canonical.json"
aliases_path = root / "Data/opcodes/aliases.json"
expansions_dir = root / "Data/expansions"
enum_names = read_canonical_data(canonical_path)
aliases = read_alias_data(aliases_path)
k_names = set(enum_names)
code_refs = collect_code_refs(root)
problems: List[str] = []
missing_in_name_map = sorted(enum_names - k_names)
if missing_in_name_map:
problems.append(
f"enum names missing from kOpcodeNames: {len(missing_in_name_map)} "
f"(sample: {missing_in_name_map[:10]})"
)
unknown_code_refs = sorted(r for r in code_refs if canonicalize(r, aliases) not in enum_names)
if unknown_code_refs:
problems.append(
f"Opcode:: references not in enum/alias map: {len(unknown_code_refs)} "
f"(sample: {unknown_code_refs[:10]})"
)
print(f"Canonical enum names: {len(enum_names)}")
print(f"kOpcodeNames entries: {len(k_names)}")
print(f"Alias entries: {len(aliases)}")
print(f"Opcode:: code references: {len(code_refs)}")
for exp_file in iter_expansion_files(expansions_dir):
names = load_expansion_names(exp_file)
canonical_names = {canonicalize(n, aliases) for n in names}
unknown = sorted(n for n in canonical_names if n not in enum_names)
missing_required = sorted(
n for n in code_refs if canonicalize(n, aliases) not in canonical_names
)
# Detect multiple raw names collapsing to one canonical name.
collisions: Dict[str, List[str]] = {}
for raw in names:
c = canonicalize(raw, aliases)
collisions.setdefault(c, []).append(raw)
alias_collisions = sorted(
(c, raws) for c, raws in collisions.items() if len(raws) > 1 and len(set(raws)) > 1
)
print(
f"[{exp_file.parent.name}] raw={len(names)} canonical={len(canonical_names)} "
f"unknown={len(unknown)} missing_required={len(missing_required)} "
f"alias_collisions={len(alias_collisions)}"
)
if unknown:
problems.append(
f"{exp_file.parent.name}: unknown canonical names after aliasing: "
f"{len(unknown)} (sample: {unknown[:10]})"
)
if missing_required and args.strict_required:
problems.append(
f"{exp_file.parent.name}: missing required opcodes from implementation refs: "
f"{len(missing_required)} (sample: {missing_required[:10]})"
)
elif missing_required:
print(
f" warn: {exp_file.parent.name} missing required refs: "
f"{len(missing_required)} (sample: {missing_required[:6]})"
)
if problems:
print("\nFAILED:")
for p in problems:
print(f"- {p}")
return 1
print("\nOK: canonical opcode contract satisfied across expansions.")
return 0
if __name__ == "__main__":
raise SystemExit(main())