binana/go/app/util/query.go

236 lines
5.5 KiB
Go
Raw Normal View History

2026-03-20 01:58:16 -04:00
package util
import (
"encoding/json"
"fmt"
"os"
"regexp"
"slices"
"strconv"
2026-03-20 01:58:16 -04:00
"github.com/parquet-go/parquet-go"
"github.com/thunderbrewhq/binana/go/app"
"github.com/thunderbrewhq/binana/go/db"
)
type QueryPresentationMode uint8
const (
PresentQueryNormal QueryPresentationMode = iota
PresentQueryNameOnly
PresentQuerySampleName
2026-03-20 01:58:16 -04:00
)
type QueryParams struct {
//
Present QueryPresentationMode
// If true, presents the quoted version of the token name
Quote bool
2026-03-20 01:58:16 -04:00
// Match pattern for profile
Profile string
// Possible values for Program
Program []string
// Possible values for OS
OS []string
// Possible values for arch
Arch []string
// Range of builds to return information for
MinBuild uint32
MaxBuild uint32
// Regular expression for tokens (symbols/type information)
Token string
// If true, Token is a POSIX regular expression
RegEx bool
2026-03-20 01:58:16 -04:00
}
type token_query struct {
params *QueryParams
sample_database map[string]db.Sample
token_regexp *regexp.Regexp
}
func format_sample_name(sample db.Sample) (name string) {
var ext string
switch sample.MimeType {
case "application/x-ms-pdb":
ext = "pdb"
case "application/vnd.microsoft.portable-executable":
ext = "exe"
case "application/x-executable":
ext = "elf"
case "application/x-mach-binary":
ext = "macho"
default:
ext = "bin"
}
name = fmt.Sprintf("%s-%s-%d-%s-%s.%s", sample.Program, sample.Version, sample.Build, sample.OS, sample.Arch, ext)
return
}
2026-03-20 01:58:16 -04:00
func (token_query *token_query) present_token(token *db.Token) {
sample, ok := token_query.sample_database[token.Source]
if !ok {
panic(token.Source)
}
name_wrap := func(s string) string {
return s
}
if token_query.params.Quote {
name_wrap = strconv.Quote
}
2026-03-20 01:58:16 -04:00
if token_query.params.Present == PresentQueryNameOnly {
for _, name := range token.Names {
if token_query.token_regexp.MatchString(name.Name) {
fmt.Println(name_wrap(name.Name))
}
}
return
} else if token_query.params.Present == PresentQuerySampleName {
sample_name := format_sample_name(sample)
for _, name := range token.Names {
if token_query.token_regexp.MatchString(name.Name) {
fmt.Println(sample_name, name_wrap(name.Name))
2026-03-20 01:58:16 -04:00
}
}
return
}
kind_name := ""
switch token.Kind {
case db.OriginalConstantToken:
kind_name = "original constant"
case db.OriginalDatatypeToken:
kind_name = "original datatype"
case db.OriginalStringToken:
kind_name = "original string"
case db.OriginalSymbolToken:
kind_name = "original symbol"
default:
return
}
fmt.Printf("%s in sample: ('%s', %s) section: '%s'", kind_name, token.Source[:8], format_sample_name(sample), token.Section)
2026-03-20 01:58:16 -04:00
if token.Offset != "" {
fmt.Printf(" at %s", token.Offset)
}
if token.Datatype != "" {
fmt.Printf(" with datatype: '%s'", token.Datatype)
}
fmt.Printf("\n")
fmt.Printf("names:\n")
for _, name := range token.Names {
name_kind_name := ""
switch name.Kind {
case db.OriginalName:
name_kind_name = "original name"
case db.DemangledName:
name_kind_name = "demangled name"
case db.BinanaizedName:
name_kind_name = "binanaized name"
default:
panic(name.Kind)
}
fmt.Printf("%s: %s\n", name_kind_name, name_wrap(name.Name))
2026-03-20 01:58:16 -04:00
}
fmt.Printf("--\n\n")
}
// attempt to match token and report to stdout
// if returns quit = true, the search is halted
func (token_query *token_query) match_token(token *db.Token) (quit bool) {
matched := false
sample, ok := token_query.sample_database[token.Source]
if !ok {
m, err := json.Marshal(token)
if err != nil {
panic(err)
}
fmt.Fprintln(os.Stderr, string(m))
app.Fatal(fmt.Errorf("a token references a sample (%s) that does not exist in the sample database. please fix your database", token.Source))
return
}
// filter out tokens from samples we don't care about
if sample.Build < token_query.params.MinBuild || sample.Build > token_query.params.MaxBuild {
return
}
if len(token_query.params.Program) > 0 {
if !slices.Contains(token_query.params.Program, sample.Program) {
return
}
}
if len(token_query.params.OS) > 0 {
if !slices.Contains(token_query.params.OS, sample.OS) {
return
}
}
if len(token_query.params.Arch) > 0 {
if !slices.Contains(token_query.params.Arch, sample.Arch) {
return
}
}
for _, name := range token.Names {
if token_query.token_regexp.MatchString(name.Name) {
matched = true
break
}
}
if matched {
token_query.present_token(token)
}
return
}
func (token_query *token_query) load_sample_database() (err error) {
token_query.sample_database = make(map[string]db.Sample)
var samples []db.Sample
samples, err = parquet.ReadFile[db.Sample]("db/samples.parquet")
if err != nil {
return
}
for _, sample := range samples {
token_query.sample_database[sample.ID] = sample
}
return
}
func Query(params *QueryParams) {
var token_query token_query
token_query.params = params
if params.RegEx {
token_query.token_regexp = regexp.MustCompilePOSIX(token_query.params.Token)
} else {
token_query.token_regexp = regexp.MustCompilePOSIX(regexp.QuoteMeta(params.Token))
}
2026-03-20 01:58:16 -04:00
if err := token_query.load_sample_database(); err != nil {
app.Fatal(err)
}
tokens_db_file, err := os.Open("db/tokens.parquet")
if err != nil {
app.Fatal(err)
}
rows := make([]db.Token, 1024)
reader := parquet.NewGenericReader[db.Token](tokens_db_file)
read_loop:
for {
n, err := reader.Read(rows)
if err != nil {
break
}
for _, token := range rows[:n] {
if token_query.match_token(&token) {
break read_loop
}
}
}
reader.Close()
tokens_db_file.Close()
}