mirror of
https://github.com/thunderbrewhq/binana.git
synced 2026-03-22 22:00:13 +00:00
This commit is contained in:
parent
ac268a16c8
commit
2c2815ab0b
22 changed files with 2122 additions and 2 deletions
14
go/app/util/dbutil/format.go
Normal file
14
go/app/util/dbutil/format.go
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
package dbutil
|
||||
|
||||
import "errors"
|
||||
|
||||
type DatabaseFormat uint8
|
||||
|
||||
const (
|
||||
DatabaseParquet DatabaseFormat = iota
|
||||
DatabaseJSON
|
||||
)
|
||||
|
||||
var (
|
||||
ErrUnknownDatabaseFormat = errors.New("dbutil: unknown database format")
|
||||
)
|
||||
80
go/app/util/dbutil/writer.go
Normal file
80
go/app/util/dbutil/writer.go
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
package dbutil
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/parquet-go/parquet-go"
|
||||
)
|
||||
|
||||
type Writer[T any] struct {
|
||||
write func([]T) (err error)
|
||||
close func() (err error)
|
||||
}
|
||||
|
||||
func (writer *Writer[T]) WriteEntries(entries []T) (err error) {
|
||||
err = writer.write(entries)
|
||||
return
|
||||
}
|
||||
|
||||
func (writer *Writer[T]) Close() (err error) {
|
||||
err = writer.close()
|
||||
return
|
||||
}
|
||||
|
||||
func Open[T any](name string, format DatabaseFormat) (writer *Writer[T], err error) {
|
||||
writer = new(Writer[T])
|
||||
var (
|
||||
file *os.File
|
||||
output io.Writer
|
||||
)
|
||||
if name == "" {
|
||||
output = os.Stdout
|
||||
} else {
|
||||
file, err = os.Create(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
output = file
|
||||
}
|
||||
switch format {
|
||||
|
||||
case DatabaseJSON:
|
||||
encoder := json.NewEncoder(output)
|
||||
writer.write = func(entries []T) (err error) {
|
||||
for _, entry := range entries {
|
||||
if err = encoder.Encode(&entry); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
writer.close = func() (err error) {
|
||||
if file != nil {
|
||||
err = file.Close()
|
||||
}
|
||||
return
|
||||
}
|
||||
case DatabaseParquet:
|
||||
generic_writer := parquet.NewGenericWriter[T](output)
|
||||
writer.write = func(entries []T) (err error) {
|
||||
_, err = generic_writer.Write(entries)
|
||||
return
|
||||
}
|
||||
writer.close = func() (err error) {
|
||||
if err = generic_writer.Close(); err != nil {
|
||||
return
|
||||
}
|
||||
if file != nil {
|
||||
err = file.Close()
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
default:
|
||||
err = fmt.Errorf("%w: %d", ErrUnknownDatabaseFormat, format)
|
||||
}
|
||||
return
|
||||
}
|
||||
49
go/app/util/demangle.go
Normal file
49
go/app/util/demangle.go
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
package util
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var demangle_cache = make(map[string]string)
|
||||
|
||||
func demangle(str string) (demangled string, err error) {
|
||||
var ok bool
|
||||
if demangled, ok = demangle_cache[str]; ok {
|
||||
return
|
||||
}
|
||||
|
||||
var output bytes.Buffer
|
||||
c := exec.Command("demumble", str)
|
||||
c.Stdout = &output
|
||||
c.Run()
|
||||
if output.Len() == 0 {
|
||||
err = fmt.Errorf("cannot demangle")
|
||||
return
|
||||
}
|
||||
demangled = output.String()
|
||||
demangled = strings.TrimSuffix(demangled, "\n")
|
||||
if str == demangled {
|
||||
err = fmt.Errorf("cannot demangle")
|
||||
return
|
||||
}
|
||||
demangle_cache[str] = demangled
|
||||
return
|
||||
}
|
||||
|
||||
func looks_mangled(str string) bool {
|
||||
if strings.HasPrefix(str, ".") {
|
||||
return true
|
||||
} else if strings.HasPrefix(str, "?") {
|
||||
return true
|
||||
} else if strings.HasPrefix(str, "_Z") {
|
||||
return true
|
||||
} else if len(str) > 0 && str[0] >= '0' && str[0] <= '9' {
|
||||
return true
|
||||
} else if strings.ContainsAny(str, "0123456789") && !strings.Contains(str, " ") {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
31
go/app/util/exe.go
Normal file
31
go/app/util/exe.go
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
package util
|
||||
|
||||
import (
|
||||
"debug/pe"
|
||||
"os"
|
||||
)
|
||||
|
||||
func get_exe_base_address(name string) (base_address uint64, err error) {
|
||||
var file *os.File
|
||||
file, err = os.Open(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
var (
|
||||
pe_file *pe.File
|
||||
)
|
||||
pe_file, err = pe.NewFile(file)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
base_address = uint64(0x400000)
|
||||
|
||||
switch h := pe_file.OptionalHeader.(type) {
|
||||
case *pe.OptionalHeader32:
|
||||
base_address = uint64(h.ImageBase)
|
||||
case *pe.OptionalHeader64:
|
||||
base_address = h.ImageBase
|
||||
}
|
||||
file.Close()
|
||||
return
|
||||
}
|
||||
19
go/app/util/hash.go
Normal file
19
go/app/util/hash.go
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
package util
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"os"
|
||||
)
|
||||
|
||||
func hash_file(name string) (id string, err error) {
|
||||
var b []byte
|
||||
b, err = os.ReadFile(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
h := sha256.New()
|
||||
h.Write(b[:])
|
||||
id = hex.EncodeToString(h.Sum(nil))
|
||||
return
|
||||
}
|
||||
224
go/app/util/make-samples.go
Normal file
224
go/app/util/make-samples.go
Normal file
|
|
@ -0,0 +1,224 @@
|
|||
package util
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/thunderbrewhq/binana/go/app"
|
||||
"github.com/thunderbrewhq/binana/go/app/util/dbutil"
|
||||
"github.com/thunderbrewhq/binana/go/db"
|
||||
)
|
||||
|
||||
type MakeSampleDatabaseParams struct {
|
||||
// A file name that corresponds to a tree of sample files.
|
||||
// Anything in this tree will be collected into the sample database
|
||||
Source string
|
||||
|
||||
// The name of the file to write the database to
|
||||
Output string
|
||||
|
||||
// Sets the format of the database file
|
||||
Format dbutil.DatabaseFormat
|
||||
|
||||
// URLs that maps to the root of the sample tree hierarchy.
|
||||
// Used to generate a list of mirror URLs for sample binaries
|
||||
DirectMirrors []string
|
||||
|
||||
// List of IPFS Gateway URLs
|
||||
// If not empty, a CID for the sample tree will be created,
|
||||
// Actually uploading anything in the sample tree, however,
|
||||
// is outside the scope of this tool
|
||||
IPFSGateways []string
|
||||
}
|
||||
|
||||
type sample_database struct {
|
||||
writer *dbutil.Writer[db.Sample]
|
||||
ipfs_tree_cid string
|
||||
buffer []db.Sample
|
||||
}
|
||||
|
||||
func (sample_database *sample_database) add(sample db.Sample) (err error) {
|
||||
sample_database.buffer = append(sample_database.buffer, sample)
|
||||
return
|
||||
}
|
||||
|
||||
func (sample_database *sample_database) Close() (err error) {
|
||||
if err = sample_database.writer.WriteEntries(sample_database.buffer); err != nil {
|
||||
return
|
||||
}
|
||||
err = sample_database.writer.Close()
|
||||
return
|
||||
}
|
||||
|
||||
func (sample_database *sample_database) make_sample_file(params *MakeSampleDatabaseParams, name, relative_name string) (err error) {
|
||||
var sample db.Sample
|
||||
// infer mime-type from extension
|
||||
switch filepath.Ext(name) {
|
||||
case ".exe":
|
||||
sample.MimeType = "application/vnd.microsoft.portable-executable"
|
||||
case ".pdb":
|
||||
sample.MimeType = "application/x-ms-pdb"
|
||||
// associate the PDB with its EXE
|
||||
sample_exe_name := strings.TrimSuffix(name, ".pdb") + ".exe"
|
||||
if _, err = os.Stat(sample_exe_name); err == nil {
|
||||
sample.Executable, err = hash_file(sample_exe_name)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
case ".macho":
|
||||
sample.MimeType = "application/x-mach-binary"
|
||||
case ".elf":
|
||||
sample.MimeType = "application/x-executable"
|
||||
default:
|
||||
// don't care about this
|
||||
return
|
||||
}
|
||||
|
||||
sample.ID, err = hash_file(name)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// get the base filename
|
||||
base_name := filepath.Base(name)
|
||||
|
||||
// split the base filename without its extension
|
||||
filename_components := strings.Split(strings.TrimSuffix(base_name, filepath.Ext(base_name)), "-")
|
||||
// now, parse the filename (these must be correctly named!)
|
||||
sample.Program = filename_components[0]
|
||||
sample.Version = filename_components[1]
|
||||
var build uint64
|
||||
build, err = strconv.ParseUint(filename_components[2], 0, 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
sample.Build = uint32(build)
|
||||
sample.OS = filename_components[3]
|
||||
sample.Arch = filename_components[4]
|
||||
|
||||
// now, create various mirrors
|
||||
for _, direct_mirror := range params.DirectMirrors {
|
||||
sample.Mirrors = append(sample.Mirrors, db.SampleMirror{
|
||||
Kind: db.MirrorDirect,
|
||||
URL: direct_mirror + relative_name,
|
||||
})
|
||||
}
|
||||
for _, ipfs_gateway := range params.IPFSGateways {
|
||||
sample.Mirrors = append(sample.Mirrors, db.SampleMirror{
|
||||
Kind: db.MirrorIPFS,
|
||||
URL: ipfs_gateway + "/" + sample_database.ipfs_tree_cid + relative_name,
|
||||
})
|
||||
}
|
||||
|
||||
// now write the sample
|
||||
|
||||
if err = sample_database.add(sample); err != nil {
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (sample_database *sample_database) make_tree(params *MakeSampleDatabaseParams, name, relative_name string) (err error) {
|
||||
var (
|
||||
tree_entries []os.DirEntry
|
||||
)
|
||||
|
||||
tree_entries, err = os.ReadDir(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
for _, tree_entry := range tree_entries {
|
||||
if tree_entry.IsDir() {
|
||||
if err = sample_database.make_tree(params, name+"/"+tree_entry.Name(), relative_name+"/"+tree_entry.Name()); err != nil {
|
||||
return
|
||||
}
|
||||
} else {
|
||||
if err = sample_database.make_sample_file(params, name+"/"+tree_entry.Name(), relative_name+"/"+tree_entry.Name()); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func ipfs_generate_file_cid(name string) (cid string, err error) {
|
||||
|
||||
// todo
|
||||
// use command:
|
||||
// ipfs add -qr --only-hash .
|
||||
// inside the root of the sample tree
|
||||
// the last CID is the root of the tree
|
||||
|
||||
var (
|
||||
wd string
|
||||
)
|
||||
wd, err = os.Getwd()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
err = os.Chdir(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
command := exec.Command("ipfs", "add", "-qr", "--only-hash", ".")
|
||||
var command_output bytes.Buffer
|
||||
command.Stdout = &command_output
|
||||
command.Run()
|
||||
if command.ProcessState.ExitCode() != 0 {
|
||||
os.Chdir(wd)
|
||||
err = fmt.Errorf("util: ipfs tool exited: %d", command.ProcessState.ExitCode())
|
||||
return
|
||||
}
|
||||
|
||||
// Parse command Output
|
||||
command_output_scanner := bufio.NewScanner(&command_output)
|
||||
|
||||
for command_output_scanner.Scan() {
|
||||
cid = command_output_scanner.Text()
|
||||
}
|
||||
|
||||
err = os.Chdir(wd)
|
||||
|
||||
return
|
||||
|
||||
}
|
||||
|
||||
func MakeSampleDatabase(params *MakeSampleDatabaseParams) {
|
||||
var (
|
||||
err error
|
||||
sample_database sample_database
|
||||
)
|
||||
|
||||
// if we want to generate IPFS links, start by getting the CID for the sample tree
|
||||
if len(params.IPFSGateways) != 0 {
|
||||
sample_database.ipfs_tree_cid, err = ipfs_generate_file_cid(params.Source)
|
||||
if err != nil {
|
||||
app.Fatal(err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
sample_database.writer, err = dbutil.Open[db.Sample](params.Output, params.Format)
|
||||
if err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
|
||||
// make the root tree, with our params, the source as the first tree, and "" (root) as the relative path
|
||||
if err = sample_database.make_tree(params, params.Source, ""); err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
|
||||
if err = sample_database.Close(); err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
}
|
||||
303
go/app/util/make-tokens.go
Normal file
303
go/app/util/make-tokens.go
Normal file
|
|
@ -0,0 +1,303 @@
|
|||
package util
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"debug/macho"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/thunderbrewhq/binana/go/app"
|
||||
"github.com/thunderbrewhq/binana/go/app/util/dbutil"
|
||||
"github.com/thunderbrewhq/binana/go/db"
|
||||
"github.com/thunderbrewhq/binana/go/pdbconv"
|
||||
"github.com/thunderbrewhq/binana/go/stringrecovery"
|
||||
)
|
||||
|
||||
type MakeTokenDatabaseParams struct {
|
||||
Source string
|
||||
Output string
|
||||
Format dbutil.DatabaseFormat
|
||||
}
|
||||
|
||||
func MakeTokenDatabase(params *MakeTokenDatabaseParams) {
|
||||
var (
|
||||
tokens_database tokens_database
|
||||
err error
|
||||
)
|
||||
if err = tokens_database.Open(params.Output, params.Format); err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
if err = tokens_database.make(params.Source); err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
if err = tokens_database.Close(); err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
type tokens_database struct {
|
||||
sequence uint64
|
||||
writer *dbutil.Writer[db.Token]
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) next_token_id() (id uint64) {
|
||||
id = tokens_database.sequence
|
||||
tokens_database.sequence++
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) Open(name string, format dbutil.DatabaseFormat) (err error) {
|
||||
tokens_database.sequence = 1
|
||||
tokens_database.writer, err = dbutil.Open[db.Token](name, format)
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) Close() (err error) {
|
||||
err = tokens_database.writer.Close()
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) Write(token *db.Token) (err error) {
|
||||
tokens := []db.Token{*token}
|
||||
if err = tokens_database.writer.WriteEntries(tokens); err != nil {
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) make_file_pdb(name string) (err error) {
|
||||
exe_name := strings.TrimSuffix(name, ".pdb") + ".exe"
|
||||
var base_address uint64
|
||||
base_address, err = get_exe_base_address(exe_name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Fprintln(os.Stderr, "[pdb]", name)
|
||||
var source_id string
|
||||
source_id, err = hash_file(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
fmt.Fprintln(os.Stderr, "[pdb]", source_id)
|
||||
// check for the existence of an alternate, .pdb.json.gz file
|
||||
_, err = os.Stat(name + ".json.gz")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
var (
|
||||
gzip_file *os.File
|
||||
gzip_reader *gzip.Reader
|
||||
)
|
||||
gzip_file, err = os.Open(name + ".json.gz")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
var pdb pdbconv.ProgramDatabase
|
||||
gzip_reader, err = gzip.NewReader(gzip_file)
|
||||
json_decoder := json.NewDecoder(gzip_reader)
|
||||
|
||||
if err = json_decoder.Decode(&pdb); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
gzip_file.Close()
|
||||
|
||||
var v pdb_token_visitor
|
||||
v.init(tokens_database, source_id, base_address)
|
||||
if err = v.visit_all(&pdb); err != nil {
|
||||
return
|
||||
}
|
||||
if err = v.write_tokens(); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) write_string_token(source_id string, section_name string, address uint64, str string) (err error) {
|
||||
var db_token db.Token
|
||||
db_token.ID = tokens_database.next_token_id()
|
||||
db_token.Source = source_id
|
||||
db_token.Section = section_name
|
||||
db_token.Kind = db.OriginalStringToken
|
||||
db_token.Offset = fmt.Sprintf("%X", address)
|
||||
|
||||
db_token.Names = append(db_token.Names, db.TokenName{db.OriginalName, str})
|
||||
|
||||
// detect if this is a mangled type identifier
|
||||
if looks_mangled(str) {
|
||||
demangled, err := demangle(str)
|
||||
if err == nil {
|
||||
db_token.Names = append(db_token.Names, db.TokenName{db.DemangledName, demangled})
|
||||
}
|
||||
}
|
||||
|
||||
err = tokens_database.Write(&db_token)
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) make_file_pe(name string) (err error) {
|
||||
var id string
|
||||
id, err = hash_file(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
err = stringrecovery.RecoverFile(name, func(section_name string, address uint64, str string) {
|
||||
fmt.Fprintf(os.Stderr, "[pe] string found: %s %08X %s\n", section_name, address, str)
|
||||
tokens_database.write_string_token(id, section_name, address, str)
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) make_file_macho(name string) (err error) {
|
||||
fmt.Fprintln(os.Stderr, "[mach-o]", name)
|
||||
var source_id string
|
||||
source_id, err = hash_file(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
fmt.Fprintln(os.Stderr, "[mach-o]", source_id)
|
||||
var (
|
||||
file *os.File
|
||||
macho_file *macho.File
|
||||
)
|
||||
file, err = os.Open(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
macho_file, err = macho.NewFile(file)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
_, dwarf_err := macho_file.DWARF()
|
||||
if dwarf_err == nil {
|
||||
fmt.Fprintln(os.Stderr, "DWARF!")
|
||||
time.Sleep(5 * time.Second)
|
||||
}
|
||||
fmt.Fprintln(os.Stderr, "[mach-o]", "cpu", macho_file.FileHeader.Cpu)
|
||||
fmt.Fprintln(os.Stderr, "[mach-o]", "loads:")
|
||||
// for _, load := range macho_file.Loads {
|
||||
// fmt.Fprintln(os.Stderr, "[mach-o]", load.String())
|
||||
// }
|
||||
fmt.Fprintln(os.Stderr, "[mach-o]", "sections:")
|
||||
for _, section := range macho_file.Sections {
|
||||
fmt.Fprintln(os.Stderr, "section", section.SectionHeader.Name)
|
||||
}
|
||||
|
||||
if macho_file.Dysymtab != nil {
|
||||
fmt.Fprintln(os.Stderr, "[mach-o]", "does not contain a dysymtab")
|
||||
} else {
|
||||
fmt.Fprintln(os.Stderr, "[mach-o]", "dysymtab:")
|
||||
}
|
||||
|
||||
var imported_symbols []string
|
||||
imported_symbols, err = macho_file.ImportedSymbols()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if macho_file.Symtab == nil {
|
||||
fmt.Fprintln(os.Stderr, "[mach-o]", "does not contain a symtab")
|
||||
} else {
|
||||
fmt.Fprintln(os.Stderr, "[mach-o]", "symtab:")
|
||||
for _, sym := range macho_file.Symtab.Syms {
|
||||
imported := slices.Contains(imported_symbols, sym.Name)
|
||||
var section_name string
|
||||
if sym.Sect != 0 {
|
||||
section_name = macho_file.Sections[sym.Sect-1].SectionHeader.Name
|
||||
}
|
||||
if imported {
|
||||
fmt.Fprintf(os.Stderr, "[mach-o] imported %s %02x %s\n", section_name, sym.Type, sym.Name)
|
||||
} else {
|
||||
if sym.Name == "" {
|
||||
// fmt.Fprintln(os.Stderr, "[mach-o]", "symbol has no name", "sect="+section_name, sym.Type, sym.Value, sym.Desc)
|
||||
} else {
|
||||
fmt.Fprintf(os.Stderr, "[mach-o] internal %s %02x %s\n", section_name, sym.Type, sym.Name)
|
||||
var token db.Token
|
||||
token.ID = tokens_database.next_token_id()
|
||||
token.Source = source_id
|
||||
token.Kind = db.OriginalSymbolToken
|
||||
token.Section = section_name
|
||||
token.Offset = fmt.Sprintf("%X", sym.Value)
|
||||
token.Names = append(token.Names, db.TokenName{db.OriginalName, sym.Name})
|
||||
|
||||
if looks_mangled(sym.Name) {
|
||||
demangled, err := demangle(sym.Name)
|
||||
if err == nil {
|
||||
token.Names = append(token.Names, db.TokenName{db.DemangledName, demangled})
|
||||
}
|
||||
}
|
||||
|
||||
tokens_database.Write(&token)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
file.Close()
|
||||
|
||||
if err = stringrecovery.RecoverFile(name, func(section_name string, address uint64, str string) {
|
||||
fmt.Fprintf(os.Stderr, "[mach-o] string found: %s %08X %s\n", section_name, address, str)
|
||||
tokens_database.write_string_token(source_id, section_name, address, str)
|
||||
}); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) make_file(name string) (err error) {
|
||||
switch filepath.Ext(name) {
|
||||
case ".macho":
|
||||
err = tokens_database.make_file_macho(name)
|
||||
case ".pdb":
|
||||
err = tokens_database.make_file_pdb(name)
|
||||
case ".exe":
|
||||
err = tokens_database.make_file_pe(name)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) make_directory(name string) (err error) {
|
||||
var entries []os.DirEntry
|
||||
entries, err = os.ReadDir(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() {
|
||||
if err = tokens_database.make_directory(filepath.Join(name, entry.Name())); err != nil {
|
||||
return
|
||||
}
|
||||
} else {
|
||||
if err = tokens_database.make_file(filepath.Join(name, entry.Name())); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) make(name string) (err error) {
|
||||
var fi os.FileInfo
|
||||
fi, err = os.Stat(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if fi.IsDir() {
|
||||
err = tokens_database.make_directory(name)
|
||||
} else {
|
||||
err = tokens_database.make_file(name)
|
||||
}
|
||||
return
|
||||
}
|
||||
474
go/app/util/pdb.go
Normal file
474
go/app/util/pdb.go
Normal file
|
|
@ -0,0 +1,474 @@
|
|||
package util
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"maps"
|
||||
"slices"
|
||||
"sort"
|
||||
"strconv"
|
||||
|
||||
"github.com/thunderbrewhq/binana/go/db"
|
||||
"github.com/thunderbrewhq/binana/go/pdbconv"
|
||||
)
|
||||
|
||||
type pdb_token_visitor struct {
|
||||
tokens_database *tokens_database
|
||||
|
||||
// location of the base module
|
||||
base_address uint64
|
||||
pdb_source_id string
|
||||
strings map[string]*db.Token
|
||||
datatypes map[string]*db.Token
|
||||
// maps a symbol to a token
|
||||
symbols map[uint64]*db.Token
|
||||
|
||||
constants []db.Token
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) init(tokens_database *tokens_database, pdb_source_id string, base_address uint64) {
|
||||
v.base_address = base_address
|
||||
v.pdb_source_id = pdb_source_id
|
||||
v.strings = make(map[string]*db.Token)
|
||||
v.datatypes = make(map[string]*db.Token)
|
||||
// map of address to token
|
||||
v.symbols = make(map[uint64]*db.Token)
|
||||
v.tokens_database = tokens_database
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_class(class *pdbconv.Class) (err error) {
|
||||
token, ok := v.datatypes[class.Name]
|
||||
if !ok {
|
||||
token = new(db.Token)
|
||||
v.datatypes[class.Name] = token
|
||||
}
|
||||
|
||||
// set token source to pdb
|
||||
token.Source = v.pdb_source_id
|
||||
// kind is debug information token
|
||||
token.Kind = db.OriginalDatatypeToken
|
||||
|
||||
// set original name
|
||||
token.Names = append(token.Names, db.TokenName{db.OriginalName, class.Name})
|
||||
|
||||
// set the basic type
|
||||
token.Keyword = "class"
|
||||
|
||||
for _, member := range class.Members {
|
||||
var token_member db.TokenMember
|
||||
if member.Kind == "Member" {
|
||||
token_member.Kind = db.FieldMember
|
||||
} else if member.Kind == "Unknown" && member.Datatype == "void *" {
|
||||
token_member.Kind = db.MethodMember
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
token_member.Key = member.Name
|
||||
token_member.Value = member.Datatype
|
||||
|
||||
if !slices.Contains(token.Members, token_member) {
|
||||
token.Members = append(token.Members, token_member)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_datatype(datatype *pdbconv.Datatype) (err error) {
|
||||
token, ok := v.datatypes[datatype.Name]
|
||||
if !ok {
|
||||
token = new(db.Token)
|
||||
v.datatypes[datatype.Name] = token
|
||||
}
|
||||
|
||||
// set token source to pdb
|
||||
token.Source = v.pdb_source_id
|
||||
// kind is debug information token
|
||||
token.Kind = db.OriginalDatatypeToken
|
||||
|
||||
// set original name
|
||||
token.Names = append(token.Names, db.TokenName{db.OriginalName, datatype.Name})
|
||||
|
||||
// set the basic type
|
||||
if datatype.Kind == "Structure" {
|
||||
token.Keyword = "struct"
|
||||
} else if datatype.Kind == "Union" {
|
||||
token.Keyword = "union"
|
||||
} else {
|
||||
err = fmt.Errorf("unhandled datatype kind '%s'", datatype.Kind)
|
||||
return
|
||||
}
|
||||
|
||||
for _, member := range datatype.Members {
|
||||
var token_member db.TokenMember
|
||||
if member.Kind == "Member" {
|
||||
token_member.Kind = db.FieldMember
|
||||
} else if member.Kind == "Unknown" && member.Datatype == "void *" {
|
||||
token_member.Kind = db.MethodMember
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
token_member.Key = member.Name
|
||||
token_member.Value = member.Datatype
|
||||
|
||||
if !slices.Contains(token.Members, token_member) {
|
||||
token.Members = append(token.Members, token_member)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_function(function *pdbconv.Function) (err error) {
|
||||
var address uint64
|
||||
address, err = strconv.ParseUint(function.Address, 0, 64)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
address = v.base_address + address
|
||||
|
||||
token, ok := v.symbols[address]
|
||||
if !ok {
|
||||
token = new(db.Token)
|
||||
v.symbols[address] = token
|
||||
}
|
||||
|
||||
// set token source to pdb
|
||||
token.Source = v.pdb_source_id
|
||||
// kind is symbol information token
|
||||
token.Kind = db.OriginalSymbolToken
|
||||
// set address
|
||||
token.Offset = fmt.Sprintf("%X", address)
|
||||
// set original name
|
||||
token.Names = append(token.Names, db.TokenName{db.OriginalName, function.Name})
|
||||
|
||||
if looks_mangled(function.Name) {
|
||||
demangled, demangler_err := demangle(function.Name)
|
||||
if demangler_err == nil {
|
||||
token.Names = append(token.Names, db.TokenName{db.DemangledName, demangled})
|
||||
}
|
||||
}
|
||||
|
||||
// visit source files
|
||||
for _, line_number := range function.LineNumbers {
|
||||
if err = v.visit_string(line_number.SourceFile); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// classify stack variables as members
|
||||
for _, stack_variable := range function.StackVariables {
|
||||
var token_member db.TokenMember
|
||||
if stack_variable.Kind == "Parameter" || stack_variable.Kind == "ObjectPointer" {
|
||||
token_member.Kind = db.ParameterMember
|
||||
} else if stack_variable.Kind == "Local" {
|
||||
token_member.Kind = db.LocalMember
|
||||
} else if stack_variable.Kind == "StaticLocal" {
|
||||
token_member.Kind = db.StaticLocalMember
|
||||
} else if stack_variable.Kind == "Constant" {
|
||||
// these are repeated elsewhere
|
||||
continue
|
||||
} else {
|
||||
err = fmt.Errorf("unhandled stack variable kind '%s'", stack_variable.Kind)
|
||||
return
|
||||
}
|
||||
|
||||
token_member.Key = stack_variable.Name
|
||||
token_member.Value = stack_variable.Datatype
|
||||
|
||||
token.Members = append(token.Members, token_member)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_enum(enum *pdbconv.Enum) (err error) {
|
||||
// create hash of enum's contents and use to key the datatype
|
||||
h := sha256.New()
|
||||
h.Write([]byte(enum.Name))
|
||||
for _, member := range enum.Members {
|
||||
h.Write([]byte(member.Name))
|
||||
h.Write([]byte(fmt.Sprintf("%d", member.Value)))
|
||||
}
|
||||
name := hex.EncodeToString(h.Sum(nil))
|
||||
|
||||
token, ok := v.datatypes[name]
|
||||
if !ok {
|
||||
token = new(db.Token)
|
||||
v.datatypes[name] = token
|
||||
}
|
||||
|
||||
token.Source = v.pdb_source_id
|
||||
|
||||
token.Keyword = "enum"
|
||||
|
||||
// apply name (may be __unnamed)
|
||||
token.Names = append(token.Names, db.TokenName{db.OriginalName, enum.Name})
|
||||
|
||||
// this is an original datatype
|
||||
token.Kind = db.OriginalDatatypeToken
|
||||
|
||||
for _, member := range enum.Members {
|
||||
var token_member db.TokenMember
|
||||
token_member.Kind = db.EnumMember
|
||||
token_member.Key = member.Name
|
||||
token_member.Value = fmt.Sprintf("%d", member.Value)
|
||||
|
||||
if !slices.Contains(token.Members, token_member) {
|
||||
token.Members = append(token.Members, token_member)
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_string(s string) (err error) {
|
||||
token, ok := v.strings[s]
|
||||
if !ok {
|
||||
token = new(db.Token)
|
||||
v.strings[s] = token
|
||||
}
|
||||
|
||||
// apply source
|
||||
token.Source = v.pdb_source_id
|
||||
|
||||
// this is a string token
|
||||
token.Kind = db.OriginalStringToken
|
||||
|
||||
// add name
|
||||
var token_name db.TokenName
|
||||
token_name.Kind = db.OriginalName
|
||||
token_name.Name = s
|
||||
token.Names = []db.TokenName{token_name}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_source_files_table(table *pdbconv.Table) (err error) {
|
||||
for _, source_file := range table.SourceFiles {
|
||||
if err = v.visit_string(source_file.Name); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_constant(symbol *pdbconv.TableSymbol) (err error) {
|
||||
var token db.Token
|
||||
token.ID = v.tokens_database.next_token_id()
|
||||
token.Source = v.pdb_source_id
|
||||
token.Keyword = "const"
|
||||
token.Datatype = symbol.Datatype
|
||||
|
||||
if symbol.Name != "" {
|
||||
var name db.TokenName
|
||||
name.Kind = db.OriginalName
|
||||
name.Name = symbol.Name
|
||||
token.Names = append(token.Names, name)
|
||||
}
|
||||
|
||||
if symbol.Undecorated != "" {
|
||||
var name db.TokenName
|
||||
name.Kind = db.OriginalName
|
||||
name.Name = symbol.Undecorated
|
||||
token.Names = append(token.Names, name)
|
||||
}
|
||||
|
||||
var value db.TokenMember
|
||||
value.Kind = db.ConstantValueMember
|
||||
value.Value = symbol.Value
|
||||
|
||||
token.Members = append(token.Members, value)
|
||||
|
||||
v.constants = append(v.constants, token)
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_table_symbol(symbol *pdbconv.TableSymbol) (err error) {
|
||||
if symbol.Address == "0x0" {
|
||||
if symbol.Value != "" && symbol.Kind == "Constant" {
|
||||
err = v.visit_constant(symbol)
|
||||
return
|
||||
}
|
||||
|
||||
// so, this does not correspond to an actual symbol.
|
||||
// we can still mine it for string tokens.
|
||||
if symbol.Name != "" {
|
||||
if err = v.visit_string(symbol.Name); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if symbol.Undecorated != "" {
|
||||
if err = v.visit_string(symbol.Name); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// this corresponds to an address
|
||||
// compute the real address
|
||||
var address uint64
|
||||
address, err = strconv.ParseUint(symbol.Address, 0, 64)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
address = v.base_address + address
|
||||
|
||||
token, ok := v.symbols[address]
|
||||
if !ok {
|
||||
token = new(db.Token)
|
||||
v.symbols[address] = token
|
||||
}
|
||||
token.Source = v.pdb_source_id
|
||||
|
||||
if symbol.Datatype != "" {
|
||||
token.Datatype = symbol.Datatype
|
||||
}
|
||||
|
||||
if symbol.Kind == "FileStatic" {
|
||||
token.Keyword = "static"
|
||||
} else if symbol.Kind == "Global" {
|
||||
token.Keyword = "global"
|
||||
}
|
||||
|
||||
if symbol.Name != "" {
|
||||
token_name := db.TokenName{db.OriginalName, symbol.Name}
|
||||
if !slices.Contains(token.Names, token_name) {
|
||||
token.Names = append(token.Names, token_name)
|
||||
}
|
||||
}
|
||||
|
||||
if symbol.Undecorated != "" {
|
||||
undecorated := db.TokenName{db.OriginalName, symbol.Undecorated}
|
||||
if !slices.Contains(token.Names, undecorated) {
|
||||
token.Names = append(token.Names, undecorated)
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_symbols_table(table *pdbconv.Table) (err error) {
|
||||
for _, symbol := range table.Symbols {
|
||||
if err = v.visit_table_symbol(&symbol); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_table(table *pdbconv.Table) (err error) {
|
||||
if table.Name == "SourceFiles" {
|
||||
err = v.visit_source_files_table(table)
|
||||
} else if table.Name == "Symbols" {
|
||||
err = v.visit_symbols_table(table)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_typedef(typedef *pdbconv.Typedef) (err error) {
|
||||
token, ok := v.datatypes[typedef.Name]
|
||||
if !ok {
|
||||
token = new(db.Token)
|
||||
v.datatypes[typedef.Name] = token
|
||||
}
|
||||
|
||||
token.Source = v.pdb_source_id
|
||||
|
||||
token.Kind = db.OriginalDatatypeToken
|
||||
|
||||
var token_name db.TokenName
|
||||
token_name.Kind = db.OriginalName
|
||||
token_name.Name = typedef.Name
|
||||
|
||||
if !slices.Contains(token.Names, token_name) {
|
||||
token.Names = append(token.Names, token_name)
|
||||
}
|
||||
|
||||
token.Datatype = typedef.Basetype
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_all(pdb *pdbconv.ProgramDatabase) (err error) {
|
||||
for _, class := range pdb.Classes {
|
||||
if err = v.visit_class(&class); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
for _, datatype := range pdb.Datatypes {
|
||||
if err = v.visit_datatype(&datatype); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
for _, enum := range pdb.Enums {
|
||||
if err = v.visit_enum(&enum); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
for _, function := range pdb.Functions {
|
||||
if err = v.visit_function(&function); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
for _, table := range pdb.Tables {
|
||||
if err = v.visit_table(&table); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
for _, typedef := range pdb.Typedefs {
|
||||
if err = v.visit_typedef(&typedef); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) write_tokens() (err error) {
|
||||
datatypes := slices.Collect(maps.Keys(v.datatypes))
|
||||
sort.Strings(datatypes)
|
||||
symbols := slices.Collect(maps.Keys(v.symbols))
|
||||
slices.SortFunc(symbols, func(a, b uint64) int {
|
||||
if a < b {
|
||||
return -1
|
||||
} else if a == b {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
})
|
||||
strings := slices.Collect(maps.Keys(v.strings))
|
||||
sort.Strings(strings)
|
||||
for _, datatype := range datatypes {
|
||||
if err = v.tokens_database.Write(v.datatypes[datatype]); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
for _, symbol := range symbols {
|
||||
if err = v.tokens_database.Write(v.symbols[symbol]); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
for _, string := range strings {
|
||||
if err = v.tokens_database.Write(v.strings[string]); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
for _, constant := range v.constants {
|
||||
if err = v.tokens_database.Write(&constant); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
189
go/app/util/query.go
Normal file
189
go/app/util/query.go
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
package util
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"regexp"
|
||||
"slices"
|
||||
|
||||
"github.com/parquet-go/parquet-go"
|
||||
"github.com/thunderbrewhq/binana/go/app"
|
||||
"github.com/thunderbrewhq/binana/go/db"
|
||||
)
|
||||
|
||||
type QueryPresentationMode uint8
|
||||
|
||||
const (
|
||||
PresentQueryNormal QueryPresentationMode = iota
|
||||
PresentQueryNameOnly
|
||||
)
|
||||
|
||||
type QueryParams struct {
|
||||
//
|
||||
Present QueryPresentationMode
|
||||
// Match pattern for profile
|
||||
Profile string
|
||||
// Possible values for Program
|
||||
Program []string
|
||||
// Possible values for OS
|
||||
OS []string
|
||||
// Possible values for arch
|
||||
Arch []string
|
||||
// Range of builds to return information for
|
||||
MinBuild uint32
|
||||
MaxBuild uint32
|
||||
// Regular expression for tokens (symbols/type information)
|
||||
Token string
|
||||
}
|
||||
|
||||
type token_query struct {
|
||||
params *QueryParams
|
||||
sample_database map[string]db.Sample
|
||||
token_regexp *regexp.Regexp
|
||||
}
|
||||
|
||||
func (token_query *token_query) present_token(token *db.Token) {
|
||||
if token_query.params.Present == PresentQueryNameOnly {
|
||||
for _, name := range token.Names {
|
||||
if token_query.token_regexp.MatchString(name.Name) {
|
||||
fmt.Println(name.Name)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
kind_name := ""
|
||||
switch token.Kind {
|
||||
case db.OriginalConstantToken:
|
||||
kind_name = "original constant"
|
||||
case db.OriginalDatatypeToken:
|
||||
kind_name = "original datatype"
|
||||
case db.OriginalStringToken:
|
||||
kind_name = "original string"
|
||||
case db.OriginalSymbolToken:
|
||||
kind_name = "original symbol"
|
||||
default:
|
||||
return
|
||||
}
|
||||
fmt.Printf("%s in sample: '%s' section: '%s'", kind_name, token.Source[:8], token.Section)
|
||||
if token.Offset != "" {
|
||||
fmt.Printf(" at %s", token.Offset)
|
||||
}
|
||||
if token.Datatype != "" {
|
||||
fmt.Printf(" with datatype: '%s'", token.Datatype)
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
fmt.Printf("names:\n")
|
||||
|
||||
for _, name := range token.Names {
|
||||
name_kind_name := ""
|
||||
switch name.Kind {
|
||||
case db.OriginalName:
|
||||
name_kind_name = "original name"
|
||||
case db.DemangledName:
|
||||
name_kind_name = "demangled name"
|
||||
case db.BinanaizedName:
|
||||
name_kind_name = "binanaized name"
|
||||
default:
|
||||
panic(name.Kind)
|
||||
}
|
||||
|
||||
fmt.Printf("%s '%s'\n", name_kind_name, name.Name)
|
||||
}
|
||||
|
||||
fmt.Printf("--\n\n")
|
||||
}
|
||||
|
||||
// attempt to match token and report to stdout
|
||||
// if returns quit = true, the search is halted
|
||||
func (token_query *token_query) match_token(token *db.Token) (quit bool) {
|
||||
matched := false
|
||||
sample, ok := token_query.sample_database[token.Source]
|
||||
if !ok {
|
||||
m, err := json.Marshal(token)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
fmt.Fprintln(os.Stderr, string(m))
|
||||
app.Fatal(fmt.Errorf("a token references a sample (%s) that does not exist in the sample database. please fix your database", token.Source))
|
||||
return
|
||||
}
|
||||
// filter out tokens from samples we don't care about
|
||||
if sample.Build < token_query.params.MinBuild || sample.Build > token_query.params.MaxBuild {
|
||||
return
|
||||
}
|
||||
if len(token_query.params.Program) > 0 {
|
||||
if !slices.Contains(token_query.params.Program, sample.Program) {
|
||||
return
|
||||
}
|
||||
}
|
||||
if len(token_query.params.OS) > 0 {
|
||||
if !slices.Contains(token_query.params.OS, sample.OS) {
|
||||
return
|
||||
}
|
||||
}
|
||||
if len(token_query.params.Arch) > 0 {
|
||||
if !slices.Contains(token_query.params.Arch, sample.Arch) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
for _, name := range token.Names {
|
||||
if token_query.token_regexp.MatchString(name.Name) {
|
||||
matched = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if matched {
|
||||
token_query.present_token(token)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (token_query *token_query) load_sample_database() (err error) {
|
||||
token_query.sample_database = make(map[string]db.Sample)
|
||||
|
||||
var samples []db.Sample
|
||||
samples, err = parquet.ReadFile[db.Sample]("db/samples.parquet")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
for _, sample := range samples {
|
||||
token_query.sample_database[sample.ID] = sample
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func Query(params *QueryParams) {
|
||||
var token_query token_query
|
||||
token_query.params = params
|
||||
token_query.token_regexp = regexp.MustCompilePOSIX(token_query.params.Token)
|
||||
|
||||
if err := token_query.load_sample_database(); err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
|
||||
tokens_db_file, err := os.Open("db/tokens.parquet")
|
||||
if err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
|
||||
rows := make([]db.Token, 1024)
|
||||
reader := parquet.NewGenericReader[db.Token](tokens_db_file)
|
||||
read_loop:
|
||||
for {
|
||||
n, err := reader.Read(rows)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
for _, token := range rows[:n] {
|
||||
if token_query.match_token(&token) {
|
||||
break read_loop
|
||||
}
|
||||
}
|
||||
}
|
||||
reader.Close()
|
||||
tokens_db_file.Close()
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue