package util import ( "encoding/json" "fmt" "os" "regexp" "slices" "github.com/parquet-go/parquet-go" "github.com/thunderbrewhq/binana/go/app" "github.com/thunderbrewhq/binana/go/db" ) type QueryPresentationMode uint8 const ( PresentQueryNormal QueryPresentationMode = iota PresentQueryNameOnly ) type QueryParams struct { // Present QueryPresentationMode // Match pattern for profile Profile string // Possible values for Program Program []string // Possible values for OS OS []string // Possible values for arch Arch []string // Range of builds to return information for MinBuild uint32 MaxBuild uint32 // Regular expression for tokens (symbols/type information) Token string } type token_query struct { params *QueryParams sample_database map[string]db.Sample token_regexp *regexp.Regexp } func (token_query *token_query) present_token(token *db.Token) { if token_query.params.Present == PresentQueryNameOnly { for _, name := range token.Names { if token_query.token_regexp.MatchString(name.Name) { fmt.Println(name.Name) } } return } kind_name := "" switch token.Kind { case db.OriginalConstantToken: kind_name = "original constant" case db.OriginalDatatypeToken: kind_name = "original datatype" case db.OriginalStringToken: kind_name = "original string" case db.OriginalSymbolToken: kind_name = "original symbol" default: return } fmt.Printf("%s in sample: '%s' section: '%s'", kind_name, token.Source[:8], token.Section) if token.Offset != "" { fmt.Printf(" at %s", token.Offset) } if token.Datatype != "" { fmt.Printf(" with datatype: '%s'", token.Datatype) } fmt.Printf("\n") fmt.Printf("names:\n") for _, name := range token.Names { name_kind_name := "" switch name.Kind { case db.OriginalName: name_kind_name = "original name" case db.DemangledName: name_kind_name = "demangled name" case db.BinanaizedName: name_kind_name = "binanaized name" default: panic(name.Kind) } fmt.Printf("%s '%s'\n", name_kind_name, name.Name) } fmt.Printf("--\n\n") } // attempt to match token and report to stdout // if returns quit = true, the search is halted func (token_query *token_query) match_token(token *db.Token) (quit bool) { matched := false sample, ok := token_query.sample_database[token.Source] if !ok { m, err := json.Marshal(token) if err != nil { panic(err) } fmt.Fprintln(os.Stderr, string(m)) app.Fatal(fmt.Errorf("a token references a sample (%s) that does not exist in the sample database. please fix your database", token.Source)) return } // filter out tokens from samples we don't care about if sample.Build < token_query.params.MinBuild || sample.Build > token_query.params.MaxBuild { return } if len(token_query.params.Program) > 0 { if !slices.Contains(token_query.params.Program, sample.Program) { return } } if len(token_query.params.OS) > 0 { if !slices.Contains(token_query.params.OS, sample.OS) { return } } if len(token_query.params.Arch) > 0 { if !slices.Contains(token_query.params.Arch, sample.Arch) { return } } for _, name := range token.Names { if token_query.token_regexp.MatchString(name.Name) { matched = true break } } if matched { token_query.present_token(token) } return } func (token_query *token_query) load_sample_database() (err error) { token_query.sample_database = make(map[string]db.Sample) var samples []db.Sample samples, err = parquet.ReadFile[db.Sample]("db/samples.parquet") if err != nil { return } for _, sample := range samples { token_query.sample_database[sample.ID] = sample } return } func Query(params *QueryParams) { var token_query token_query token_query.params = params token_query.token_regexp = regexp.MustCompilePOSIX(token_query.params.Token) if err := token_query.load_sample_database(); err != nil { app.Fatal(err) } tokens_db_file, err := os.Open("db/tokens.parquet") if err != nil { app.Fatal(err) } rows := make([]db.Token, 1024) reader := parquet.NewGenericReader[db.Token](tokens_db_file) read_loop: for { n, err := reader.Read(rows) if err != nil { break } for _, token := range rows[:n] { if token_query.match_token(&token) { break read_loop } } } reader.Close() tokens_db_file.Close() }