feat(binana): add tokens database
Some checks failed
Push / build (push) Has been cancelled

This commit is contained in:
phaneron 2026-03-20 01:58:16 -04:00
parent ac268a16c8
commit 2c2815ab0b
22 changed files with 2122 additions and 2 deletions

303
go/app/util/make-tokens.go Normal file
View file

@ -0,0 +1,303 @@
package util
import (
"compress/gzip"
"debug/macho"
"encoding/json"
"fmt"
"os"
"path/filepath"
"slices"
"strings"
"time"
"github.com/thunderbrewhq/binana/go/app"
"github.com/thunderbrewhq/binana/go/app/util/dbutil"
"github.com/thunderbrewhq/binana/go/db"
"github.com/thunderbrewhq/binana/go/pdbconv"
"github.com/thunderbrewhq/binana/go/stringrecovery"
)
type MakeTokenDatabaseParams struct {
Source string
Output string
Format dbutil.DatabaseFormat
}
func MakeTokenDatabase(params *MakeTokenDatabaseParams) {
var (
tokens_database tokens_database
err error
)
if err = tokens_database.Open(params.Output, params.Format); err != nil {
app.Fatal(err)
}
if err = tokens_database.make(params.Source); err != nil {
app.Fatal(err)
}
if err = tokens_database.Close(); err != nil {
app.Fatal(err)
}
}
type tokens_database struct {
sequence uint64
writer *dbutil.Writer[db.Token]
}
func (tokens_database *tokens_database) next_token_id() (id uint64) {
id = tokens_database.sequence
tokens_database.sequence++
return
}
func (tokens_database *tokens_database) Open(name string, format dbutil.DatabaseFormat) (err error) {
tokens_database.sequence = 1
tokens_database.writer, err = dbutil.Open[db.Token](name, format)
return
}
func (tokens_database *tokens_database) Close() (err error) {
err = tokens_database.writer.Close()
return
}
func (tokens_database *tokens_database) Write(token *db.Token) (err error) {
tokens := []db.Token{*token}
if err = tokens_database.writer.WriteEntries(tokens); err != nil {
return
}
return
}
func (tokens_database *tokens_database) make_file_pdb(name string) (err error) {
exe_name := strings.TrimSuffix(name, ".pdb") + ".exe"
var base_address uint64
base_address, err = get_exe_base_address(exe_name)
if err != nil {
return
}
fmt.Fprintln(os.Stderr, "[pdb]", name)
var source_id string
source_id, err = hash_file(name)
if err != nil {
return
}
fmt.Fprintln(os.Stderr, "[pdb]", source_id)
// check for the existence of an alternate, .pdb.json.gz file
_, err = os.Stat(name + ".json.gz")
if err != nil {
return
}
var (
gzip_file *os.File
gzip_reader *gzip.Reader
)
gzip_file, err = os.Open(name + ".json.gz")
if err != nil {
return
}
var pdb pdbconv.ProgramDatabase
gzip_reader, err = gzip.NewReader(gzip_file)
json_decoder := json.NewDecoder(gzip_reader)
if err = json_decoder.Decode(&pdb); err != nil {
return
}
gzip_file.Close()
var v pdb_token_visitor
v.init(tokens_database, source_id, base_address)
if err = v.visit_all(&pdb); err != nil {
return
}
if err = v.write_tokens(); err != nil {
return
}
return
}
func (tokens_database *tokens_database) write_string_token(source_id string, section_name string, address uint64, str string) (err error) {
var db_token db.Token
db_token.ID = tokens_database.next_token_id()
db_token.Source = source_id
db_token.Section = section_name
db_token.Kind = db.OriginalStringToken
db_token.Offset = fmt.Sprintf("%X", address)
db_token.Names = append(db_token.Names, db.TokenName{db.OriginalName, str})
// detect if this is a mangled type identifier
if looks_mangled(str) {
demangled, err := demangle(str)
if err == nil {
db_token.Names = append(db_token.Names, db.TokenName{db.DemangledName, demangled})
}
}
err = tokens_database.Write(&db_token)
return
}
func (tokens_database *tokens_database) make_file_pe(name string) (err error) {
var id string
id, err = hash_file(name)
if err != nil {
return
}
err = stringrecovery.RecoverFile(name, func(section_name string, address uint64, str string) {
fmt.Fprintf(os.Stderr, "[pe] string found: %s %08X %s\n", section_name, address, str)
tokens_database.write_string_token(id, section_name, address, str)
})
return
}
func (tokens_database *tokens_database) make_file_macho(name string) (err error) {
fmt.Fprintln(os.Stderr, "[mach-o]", name)
var source_id string
source_id, err = hash_file(name)
if err != nil {
return
}
fmt.Fprintln(os.Stderr, "[mach-o]", source_id)
var (
file *os.File
macho_file *macho.File
)
file, err = os.Open(name)
if err != nil {
return
}
macho_file, err = macho.NewFile(file)
if err != nil {
return
}
_, dwarf_err := macho_file.DWARF()
if dwarf_err == nil {
fmt.Fprintln(os.Stderr, "DWARF!")
time.Sleep(5 * time.Second)
}
fmt.Fprintln(os.Stderr, "[mach-o]", "cpu", macho_file.FileHeader.Cpu)
fmt.Fprintln(os.Stderr, "[mach-o]", "loads:")
// for _, load := range macho_file.Loads {
// fmt.Fprintln(os.Stderr, "[mach-o]", load.String())
// }
fmt.Fprintln(os.Stderr, "[mach-o]", "sections:")
for _, section := range macho_file.Sections {
fmt.Fprintln(os.Stderr, "section", section.SectionHeader.Name)
}
if macho_file.Dysymtab != nil {
fmt.Fprintln(os.Stderr, "[mach-o]", "does not contain a dysymtab")
} else {
fmt.Fprintln(os.Stderr, "[mach-o]", "dysymtab:")
}
var imported_symbols []string
imported_symbols, err = macho_file.ImportedSymbols()
if err != nil {
return
}
if macho_file.Symtab == nil {
fmt.Fprintln(os.Stderr, "[mach-o]", "does not contain a symtab")
} else {
fmt.Fprintln(os.Stderr, "[mach-o]", "symtab:")
for _, sym := range macho_file.Symtab.Syms {
imported := slices.Contains(imported_symbols, sym.Name)
var section_name string
if sym.Sect != 0 {
section_name = macho_file.Sections[sym.Sect-1].SectionHeader.Name
}
if imported {
fmt.Fprintf(os.Stderr, "[mach-o] imported %s %02x %s\n", section_name, sym.Type, sym.Name)
} else {
if sym.Name == "" {
// fmt.Fprintln(os.Stderr, "[mach-o]", "symbol has no name", "sect="+section_name, sym.Type, sym.Value, sym.Desc)
} else {
fmt.Fprintf(os.Stderr, "[mach-o] internal %s %02x %s\n", section_name, sym.Type, sym.Name)
var token db.Token
token.ID = tokens_database.next_token_id()
token.Source = source_id
token.Kind = db.OriginalSymbolToken
token.Section = section_name
token.Offset = fmt.Sprintf("%X", sym.Value)
token.Names = append(token.Names, db.TokenName{db.OriginalName, sym.Name})
if looks_mangled(sym.Name) {
demangled, err := demangle(sym.Name)
if err == nil {
token.Names = append(token.Names, db.TokenName{db.DemangledName, demangled})
}
}
tokens_database.Write(&token)
}
}
}
}
file.Close()
if err = stringrecovery.RecoverFile(name, func(section_name string, address uint64, str string) {
fmt.Fprintf(os.Stderr, "[mach-o] string found: %s %08X %s\n", section_name, address, str)
tokens_database.write_string_token(source_id, section_name, address, str)
}); err != nil {
return
}
return
}
func (tokens_database *tokens_database) make_file(name string) (err error) {
switch filepath.Ext(name) {
case ".macho":
err = tokens_database.make_file_macho(name)
case ".pdb":
err = tokens_database.make_file_pdb(name)
case ".exe":
err = tokens_database.make_file_pe(name)
}
return
}
func (tokens_database *tokens_database) make_directory(name string) (err error) {
var entries []os.DirEntry
entries, err = os.ReadDir(name)
if err != nil {
return
}
for _, entry := range entries {
if entry.IsDir() {
if err = tokens_database.make_directory(filepath.Join(name, entry.Name())); err != nil {
return
}
} else {
if err = tokens_database.make_file(filepath.Join(name, entry.Name())); err != nil {
return
}
}
}
return
}
func (tokens_database *tokens_database) make(name string) (err error) {
var fi os.FileInfo
fi, err = os.Stat(name)
if err != nil {
return
}
if fi.IsDir() {
err = tokens_database.make_directory(name)
} else {
err = tokens_database.make_file(name)
}
return
}