binana/go/app/util/query.go
superp00t 2c2815ab0b
Some checks failed
Push / build (push) Has been cancelled
feat(binana): add tokens database
2026-03-20 01:58:16 -04:00

189 lines
4.2 KiB
Go

package util
import (
"encoding/json"
"fmt"
"os"
"regexp"
"slices"
"github.com/parquet-go/parquet-go"
"github.com/thunderbrewhq/binana/go/app"
"github.com/thunderbrewhq/binana/go/db"
)
type QueryPresentationMode uint8
const (
PresentQueryNormal QueryPresentationMode = iota
PresentQueryNameOnly
)
type QueryParams struct {
//
Present QueryPresentationMode
// Match pattern for profile
Profile string
// Possible values for Program
Program []string
// Possible values for OS
OS []string
// Possible values for arch
Arch []string
// Range of builds to return information for
MinBuild uint32
MaxBuild uint32
// Regular expression for tokens (symbols/type information)
Token string
}
type token_query struct {
params *QueryParams
sample_database map[string]db.Sample
token_regexp *regexp.Regexp
}
func (token_query *token_query) present_token(token *db.Token) {
if token_query.params.Present == PresentQueryNameOnly {
for _, name := range token.Names {
if token_query.token_regexp.MatchString(name.Name) {
fmt.Println(name.Name)
}
}
return
}
kind_name := ""
switch token.Kind {
case db.OriginalConstantToken:
kind_name = "original constant"
case db.OriginalDatatypeToken:
kind_name = "original datatype"
case db.OriginalStringToken:
kind_name = "original string"
case db.OriginalSymbolToken:
kind_name = "original symbol"
default:
return
}
fmt.Printf("%s in sample: '%s' section: '%s'", kind_name, token.Source[:8], token.Section)
if token.Offset != "" {
fmt.Printf(" at %s", token.Offset)
}
if token.Datatype != "" {
fmt.Printf(" with datatype: '%s'", token.Datatype)
}
fmt.Printf("\n")
fmt.Printf("names:\n")
for _, name := range token.Names {
name_kind_name := ""
switch name.Kind {
case db.OriginalName:
name_kind_name = "original name"
case db.DemangledName:
name_kind_name = "demangled name"
case db.BinanaizedName:
name_kind_name = "binanaized name"
default:
panic(name.Kind)
}
fmt.Printf("%s '%s'\n", name_kind_name, name.Name)
}
fmt.Printf("--\n\n")
}
// attempt to match token and report to stdout
// if returns quit = true, the search is halted
func (token_query *token_query) match_token(token *db.Token) (quit bool) {
matched := false
sample, ok := token_query.sample_database[token.Source]
if !ok {
m, err := json.Marshal(token)
if err != nil {
panic(err)
}
fmt.Fprintln(os.Stderr, string(m))
app.Fatal(fmt.Errorf("a token references a sample (%s) that does not exist in the sample database. please fix your database", token.Source))
return
}
// filter out tokens from samples we don't care about
if sample.Build < token_query.params.MinBuild || sample.Build > token_query.params.MaxBuild {
return
}
if len(token_query.params.Program) > 0 {
if !slices.Contains(token_query.params.Program, sample.Program) {
return
}
}
if len(token_query.params.OS) > 0 {
if !slices.Contains(token_query.params.OS, sample.OS) {
return
}
}
if len(token_query.params.Arch) > 0 {
if !slices.Contains(token_query.params.Arch, sample.Arch) {
return
}
}
for _, name := range token.Names {
if token_query.token_regexp.MatchString(name.Name) {
matched = true
break
}
}
if matched {
token_query.present_token(token)
}
return
}
func (token_query *token_query) load_sample_database() (err error) {
token_query.sample_database = make(map[string]db.Sample)
var samples []db.Sample
samples, err = parquet.ReadFile[db.Sample]("db/samples.parquet")
if err != nil {
return
}
for _, sample := range samples {
token_query.sample_database[sample.ID] = sample
}
return
}
func Query(params *QueryParams) {
var token_query token_query
token_query.params = params
token_query.token_regexp = regexp.MustCompilePOSIX(token_query.params.Token)
if err := token_query.load_sample_database(); err != nil {
app.Fatal(err)
}
tokens_db_file, err := os.Open("db/tokens.parquet")
if err != nil {
app.Fatal(err)
}
rows := make([]db.Token, 1024)
reader := parquet.NewGenericReader[db.Token](tokens_db_file)
read_loop:
for {
n, err := reader.Read(rows)
if err != nil {
break
}
for _, token := range rows[:n] {
if token_query.match_token(&token) {
break read_loop
}
}
}
reader.Close()
tokens_db_file.Close()
}