feat(binana): add tokens database
Some checks failed
Push / build (push) Has been cancelled

This commit is contained in:
phaneron 2026-03-20 01:58:16 -04:00
parent ac268a16c8
commit 2c2815ab0b
22 changed files with 2122 additions and 2 deletions

47
go/db/sample.go Normal file
View file

@ -0,0 +1,47 @@
package db
type MirrorKind uint8
const (
MirrorDirect MirrorKind = iota
MirrorIPFS
)
type SampleMirror struct {
Kind MirrorKind `json:"kind" parquet:"kind"`
URL string `json:"url" parquet:"url,delta"`
}
type Sample struct {
// The SHA-256 sum of the sample file
ID string `json:"id" parquet:"id,dict"`
// <Optional> if this is a debugging file, then this is a SHA-256 sum
// which references another sample file, which is the executable file
Executable string `json:"exe,omitempty" parquet:"exe,dict"`
// This is the MIME type identifier of the sample file.
// Possible sample types include:
// * (Windows .exe) application/vnd.microsoft.portable-executable
// * (Mach-O binary) application/x-mach-binary
// * (Linux binary) application/x-elf
MimeType string `json:"mimetype" parquet:"mimetype,dict"`
// This is the code that signifies which program the sample is a build of.
Program string `json:"program" parquet:"program,dict"`
// <Optional> This is the build sequence of the sample e.g. 12340
Build uint32 `json:"build,omitempty" parquet:"build"`
// <Optional> This is the semantic version/release id of the sample e.g. 3.3.5a
Version string `json:"version,omitempty" parquet:"version"`
// The OS of the sample, uses GOOS naming convention
OS string `json:"os" parquet:"os,dict"`
// The architecture of the sample, uses GOARCH naming convention
Arch string `json:"arch" parquet:"arch,dict"`
// A URL where the sample can be downloaded
Mirrors []SampleMirror `json:"mirrors,omitempty" parquet:"mirrors"`
}

79
go/db/token.go Normal file
View file

@ -0,0 +1,79 @@
package db
type (
TokenKind uint8
TokenNameKind uint8
TokenMemberKind uint8
)
const (
// OriginalName means this string appeared verbatim in the original sample, and was not altered.
OriginalName TokenNameKind = iota
// DemangledName is provided in the case that the OriginalName was mangled by the compiler.
DemangledName
// Mangled names can be automatically Binanaized, i.e. converted into a naive syntax for wide
// compatibility with SRE tools
BinanaizedName
)
const (
// The token was obtained from a PDB or a Mach-O symtab
OriginalSymbolToken TokenKind = iota
// The token was found by scanning the non-executable sections of the binary for 0-terminated ASCII strings
OriginalStringToken
// The token is a datatype was obtained from a PDB or DWARF debugging file
OriginalDatatypeToken
// This token is a constant named value with no address
OriginalConstantToken
)
const (
ConstantValueMember TokenMemberKind = iota
EnumMember
// This is a part of a struct
// key = the field name
// value = the C type of the field
FieldMember
// This is a method of a class
MethodMember
// This in argument to a function
ParameterMember
// This is a local variable in a function
LocalMember
// This is a statically declared variable in a function
StaticLocalMember
)
type TokenName struct {
Kind TokenNameKind `json:"kind" parquet:"kind"`
Name string `json:"name" parquet:"name,dict"`
}
type TokenMember struct {
Kind TokenMemberKind `json:"kind" parquet:"kind"`
Key string `json:"key,omitempty" parquet:"key,dict"`
Value string `json:"value" parquet:"value,dict"`
}
type Token struct {
// Unique 64-bit identifier
ID uint64 `json:"id" parquet:"id"`
// The SHA-256 hash id of the sample which generated the token
Source string `json:"src" parquet:"src,dict"`
// The color and subhead of the token
Kind TokenKind `json:"kind" parquet:"kind"`
// If this is a datatype, keyword tells you what kind of datatype it is. Useful when generating C code.
Keyword string `json:"keyword,omitempty" parquet:"keyword,dict"`
// If this is a global variable/constant, this tells you the data type
Datatype string `json:"datatype,omitempty" parquet:"datatype,dict"`
// The section where the token originated
Section string `json:"section,omitempty"`
// The offset (in hexadecimal) where the symbol
Offset string `json:"offset,omitempty"`
// Alternate names for the token
Names []TokenName `json:"names"`
// Clickable references to other tokens
Highlights []string `json:"crumbs,omitempty"`
// Struct/Enum members
Members []TokenMember `json:"members,omitempty"`
}