refactor: derive turtle opcodes from classic

This commit is contained in:
Kelsi 2026-03-15 02:55:05 -07:00
parent 0b6265bc55
commit 6ede9a2968
12 changed files with 428 additions and 394 deletions

View file

@ -0,0 +1,175 @@
#!/usr/bin/env python3
"""
Report the semantic opcode diff between the Classic and Turtle expansion maps.
The report normalizes:
- hex formatting differences (0x67 vs 0x067)
- alias names that collapse to the same canonical opcode
It highlights:
- true wire differences for the same canonical opcode
- canonical opcodes present only in Classic or only in Turtle
- name-only differences where the wire matches after aliasing
"""
from __future__ import annotations
import argparse
import json
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Iterable, List, Tuple
from opcode_map_utils import load_opcode_map
RE_OPCODE_NAME = re.compile(r"^(?:CMSG|SMSG|MSG)_[A-Z0-9_]+$")
def read_aliases(path: Path) -> Dict[str, str]:
data = json.loads(path.read_text())
aliases = data.get("aliases", {})
out: Dict[str, str] = {}
for key, value in aliases.items():
if isinstance(key, str) and isinstance(value, str):
out[key] = value
return out
def canonicalize(name: str, aliases: Dict[str, str]) -> str:
seen = set()
current = name
while current in aliases and current not in seen:
seen.add(current)
current = aliases[current]
return current
def load_map(path: Path) -> Dict[str, int]:
data = load_opcode_map(path)
out: Dict[str, int] = {}
for key, value in data.items():
if not isinstance(key, str) or not RE_OPCODE_NAME.match(key):
continue
if not isinstance(value, str) or not value.lower().startswith("0x"):
continue
out[key] = int(value, 16)
return out
@dataclass(frozen=True)
class CanonicalEntry:
canonical_name: str
raw_value: int
raw_names: Tuple[str, ...]
def build_canonical_entries(
raw_map: Dict[str, int], aliases: Dict[str, str]
) -> Dict[str, CanonicalEntry]:
grouped: Dict[str, List[Tuple[str, int]]] = {}
for raw_name, raw_value in raw_map.items():
canonical_name = canonicalize(raw_name, aliases)
grouped.setdefault(canonical_name, []).append((raw_name, raw_value))
out: Dict[str, CanonicalEntry] = {}
for canonical_name, entries in grouped.items():
raw_values = {raw_value for _, raw_value in entries}
if len(raw_values) != 1:
formatted = ", ".join(
f"{name}=0x{raw_value:03X}" for name, raw_value in sorted(entries)
)
raise ValueError(
f"Expansion map contains multiple wires for canonical opcode "
f"{canonical_name}: {formatted}"
)
raw_value = next(iter(raw_values))
raw_names = tuple(sorted(name for name, _ in entries))
out[canonical_name] = CanonicalEntry(canonical_name, raw_value, raw_names)
return out
def format_hex(raw_value: int) -> str:
return f"0x{raw_value:03X}"
def emit_section(title: str, rows: Iterable[str], limit: int | None) -> None:
rows = list(rows)
print(f"{title}: {len(rows)}")
if not rows:
return
shown = rows if limit is None else rows[:limit]
for row in shown:
print(f" {row}")
if limit is not None and len(rows) > limit:
print(f" ... {len(rows) - limit} more")
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--root", default=".")
parser.add_argument(
"--limit",
type=int,
default=80,
help="Maximum rows to print per section; use -1 for no limit.",
)
args = parser.parse_args()
root = Path(args.root).resolve()
aliases = read_aliases(root / "Data/opcodes/aliases.json")
classic_raw = load_map(root / "Data/expansions/classic/opcodes.json")
turtle_raw = load_map(root / "Data/expansions/turtle/opcodes.json")
classic = build_canonical_entries(classic_raw, aliases)
turtle = build_canonical_entries(turtle_raw, aliases)
classic_names = set(classic)
turtle_names = set(turtle)
shared_names = classic_names & turtle_names
different_wire = []
same_wire_name_only = []
for canonical_name in sorted(shared_names):
c = classic[canonical_name]
t = turtle[canonical_name]
if c.raw_value != t.raw_value:
different_wire.append(
f"{canonical_name}: classic={format_hex(c.raw_value)} "
f"turtle={format_hex(t.raw_value)}"
)
elif c.raw_names != t.raw_names:
same_wire_name_only.append(
f"{canonical_name}: wire={format_hex(c.raw_value)} "
f"classic_names={list(c.raw_names)} turtle_names={list(t.raw_names)}"
)
classic_only = [
f"{name}: {format_hex(classic[name].raw_value)} names={list(classic[name].raw_names)}"
for name in sorted(classic_names - turtle_names)
]
turtle_only = [
f"{name}: {format_hex(turtle[name].raw_value)} names={list(turtle[name].raw_names)}"
for name in sorted(turtle_names - classic_names)
]
limit = None if args.limit < 0 else args.limit
print(f"classic canonical entries: {len(classic)}")
print(f"turtle canonical entries: {len(turtle)}")
print(f"shared canonical entries: {len(shared_names)}")
print()
emit_section("Different wire", different_wire, limit)
print()
emit_section("Classic only", classic_only, limit)
print()
emit_section("Turtle only", turtle_only, limit)
print()
emit_section("Same wire, name-only differences", same_wire_name_only, limit)
return 0
if __name__ == "__main__":
raise SystemExit(main())

46
tools/opcode_map_utils.py Normal file
View file

@ -0,0 +1,46 @@
#!/usr/bin/env python3
from __future__ import annotations
import json
import re
from pathlib import Path
from typing import Dict, Set
RE_OPCODE_NAME = re.compile(r"^(?:CMSG|SMSG|MSG)_[A-Z0-9_]+$")
def load_opcode_map(path: Path, _seen: Set[Path] | None = None) -> Dict[str, str]:
if _seen is None:
_seen = set()
path = path.resolve()
if path in _seen:
chain = " -> ".join(str(p) for p in list(_seen) + [path])
raise ValueError(f"Opcode map inheritance cycle: {chain}")
_seen.add(path)
data = json.loads(path.read_text())
merged: Dict[str, str] = {}
extends = data.get("_extends")
if isinstance(extends, str) and extends:
merged.update(load_opcode_map(path.parent / extends, _seen))
remove = data.get("_remove", [])
if isinstance(remove, list):
for name in remove:
if isinstance(name, str):
merged.pop(name, None)
for key, value in data.items():
if not isinstance(key, str) or not RE_OPCODE_NAME.match(key):
continue
if isinstance(value, str):
merged[key] = value
elif isinstance(value, int):
merged[key] = str(value)
_seen.remove(path)
return merged

View file

@ -17,6 +17,8 @@ import re
from pathlib import Path
from typing import Dict, Iterable, List, Set
from opcode_map_utils import load_opcode_map
RE_OPCODE_NAME = re.compile(r"^(?:CMSG|SMSG|MSG)_[A-Z0-9_]+$")
RE_CODE_REF = re.compile(r"\bOpcode::((?:CMSG|SMSG|MSG)_[A-Z0-9_]+)\b")
@ -53,12 +55,8 @@ def iter_expansion_files(expansions_dir: Path) -> Iterable[Path]:
def load_expansion_names(path: Path) -> Dict[str, str]:
data = json.loads(path.read_text())
out: Dict[str, str] = {}
for k, v in data.items():
if RE_OPCODE_NAME.match(k):
out[k] = str(v)
return out
data = load_opcode_map(path)
return {k: str(v) for k, v in data.items() if RE_OPCODE_NAME.match(k)}
def collect_code_refs(root: Path) -> Set[str]: