mirror of
https://github.com/thunderbrewhq/binana.git
synced 2026-03-22 22:00:13 +00:00
This commit is contained in:
parent
ac268a16c8
commit
2c2815ab0b
22 changed files with 2122 additions and 2 deletions
3
db/samples.parquet
Normal file
3
db/samples.parquet
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f19fac1fbb4db2383995a0285a30e1826e567e4198e35137ac773e0bad516401
|
||||
size 6011
|
||||
3
db/tokens.parquet
Normal file
3
db/tokens.parquet
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8c31761b8675ce1fee186061dc84c184b5eb23ec92368230abbf611ffc9143f7
|
||||
size 156789790
|
||||
12
go.mod
12
go.mod
|
|
@ -4,18 +4,26 @@ go 1.25.5
|
|||
|
||||
require (
|
||||
github.com/fatih/color v1.18.0
|
||||
github.com/pierrec/lz4/v4 v4.1.21
|
||||
github.com/pierrec/lz4/v4 v4.1.26
|
||||
github.com/spf13/cobra v1.8.1
|
||||
modernc.org/cc/v3 v3.41.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/andybalholm/brotli v1.2.0 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/inconshreveable/mousetrap v1.1.0 // indirect
|
||||
github.com/klauspost/compress v1.18.4 // indirect
|
||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/parquet-go/bitpack v1.0.0 // indirect
|
||||
github.com/parquet-go/jsonlite v1.5.0 // indirect
|
||||
github.com/parquet-go/parquet-go v0.29.0 // indirect
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||
github.com/spf13/pflag v1.0.5 // indirect
|
||||
golang.org/x/sys v0.25.0 // indirect
|
||||
github.com/twpayne/go-geom v1.6.1 // indirect
|
||||
golang.org/x/sys v0.42.0 // indirect
|
||||
google.golang.org/protobuf v1.36.11 // indirect
|
||||
lukechampine.com/uint128 v1.3.0 // indirect
|
||||
modernc.org/mathutil v1.6.0 // indirect
|
||||
modernc.org/strutil v1.2.0 // indirect
|
||||
|
|
|
|||
21
go.sum
21
go.sum
|
|
@ -1,3 +1,5 @@
|
|||
github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ=
|
||||
github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||
|
|
@ -5,15 +7,27 @@ github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
|
|||
github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
|
||||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
|
||||
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
|
||||
github.com/klauspost/compress v1.18.4 h1:RPhnKRAQ4Fh8zU2FY/6ZFDwTVTxgJ/EMydqSTzE9a2c=
|
||||
github.com/klauspost/compress v1.18.4/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
|
||||
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
||||
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
||||
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/parquet-go/bitpack v1.0.0 h1:AUqzlKzPPXf2bCdjfj4sTeacrUwsT7NlcYDMUQxPcQA=
|
||||
github.com/parquet-go/bitpack v1.0.0/go.mod h1:XnVk9TH+O40eOOmvpAVZ7K2ocQFrQwysLMnc6M/8lgs=
|
||||
github.com/parquet-go/jsonlite v1.5.0 h1:ulS7lNWdPwiqDMLzTiXHYmIUhu99mavZh2iAVdXet3g=
|
||||
github.com/parquet-go/jsonlite v1.5.0/go.mod h1:nDjpkpL4EOtqs6NQugUsi0Rleq9sW/OtC1NnZEnxzF0=
|
||||
github.com/parquet-go/parquet-go v0.29.0 h1:xXlPtFVR51jpSVzf+cgHnNIcb7Xet+iuvkbe0HIm90Y=
|
||||
github.com/parquet-go/parquet-go v0.29.0/go.mod h1:navtkAYr2LGoJVp141oXPlO/sxLvaOe3la2JEoD8+rg=
|
||||
github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
|
||||
github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
|
||||
github.com/pierrec/lz4/v4 v4.1.26 h1:GrpZw1gZttORinvzBdXPUXATeqlJjqUG/D87TKMnhjY=
|
||||
github.com/pierrec/lz4/v4 v4.1.26/go.mod h1:EoQMVJgeeEOMsCqCzqFm2O0cJvljX2nGZjcRIPL34O4=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
|
|
@ -21,10 +35,17 @@ github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
|
|||
github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
|
||||
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
|
||||
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
|
||||
github.com/twpayne/go-geom v1.6.1 h1:iLE+Opv0Ihm/ABIcvQFGIiFBXd76oBIar9drAwHFhR4=
|
||||
github.com/twpayne/go-geom v1.6.1/go.mod h1:Kr+Nly6BswFsKM5sd31YaoWS5PeDDH2NftJTK7Gd028=
|
||||
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
|
||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34=
|
||||
golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
|
||||
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
||||
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
|
||||
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
lukechampine.com/uint128 v1.3.0 h1:cDdUVfRwDUDovz610ABgFD17nXD4/uDgVHl2sC3+sbo=
|
||||
|
|
|
|||
|
|
@ -4,6 +4,9 @@ import (
|
|||
_ "github.com/thunderbrewhq/binana/go/app/cmd/add_symbol"
|
||||
_ "github.com/thunderbrewhq/binana/go/app/cmd/lint"
|
||||
_ "github.com/thunderbrewhq/binana/go/app/cmd/make"
|
||||
_ "github.com/thunderbrewhq/binana/go/app/cmd/make_samples"
|
||||
_ "github.com/thunderbrewhq/binana/go/app/cmd/make_tokens"
|
||||
_ "github.com/thunderbrewhq/binana/go/app/cmd/query"
|
||||
"github.com/thunderbrewhq/binana/go/app/cmd/root"
|
||||
_ "github.com/thunderbrewhq/binana/go/app/cmd/tidy"
|
||||
|
||||
|
|
|
|||
68
go/app/cmd/make_samples/make-samples.go
Normal file
68
go/app/cmd/make_samples/make-samples.go
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
package make_samples
|
||||
|
||||
import (
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/thunderbrewhq/binana/go/app"
|
||||
"github.com/thunderbrewhq/binana/go/app/cmd/root"
|
||||
"github.com/thunderbrewhq/binana/go/app/util"
|
||||
"github.com/thunderbrewhq/binana/go/app/util/dbutil"
|
||||
)
|
||||
|
||||
var make_samples_cmd = cobra.Command{
|
||||
Use: "make-samples",
|
||||
Run: run_make_samples_command,
|
||||
}
|
||||
|
||||
func init() {
|
||||
f := make_samples_cmd.Flags()
|
||||
f.StringP("source", "s", "", "required: source tree of sample binaries")
|
||||
f.StringP("output-file", "o", "", "write the database to a file")
|
||||
f.StringSlice("direct-mirror", nil, "list of direct mirror URLs that already contain the sample binaries")
|
||||
f.StringSlice("ipfs-gateway", nil, "list of IPFS gateways")
|
||||
f.StringP("format", "f", "json", "the format of the output database [json|parquet]")
|
||||
root.RootCmd.AddCommand(&make_samples_cmd)
|
||||
}
|
||||
|
||||
func run_make_samples_command(cmd *cobra.Command, args []string) {
|
||||
f := cmd.Flags()
|
||||
var (
|
||||
params util.MakeSampleDatabaseParams
|
||||
err error
|
||||
format string
|
||||
)
|
||||
params.Source, err = f.GetString("source")
|
||||
if err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
if params.Source == "" {
|
||||
cmd.Help()
|
||||
return
|
||||
}
|
||||
params.Output, err = f.GetString("output-file")
|
||||
if err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
format, err = f.GetString("format")
|
||||
if err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
switch format {
|
||||
case "json":
|
||||
params.Format = dbutil.DatabaseJSON
|
||||
case "parquet":
|
||||
params.Format = dbutil.DatabaseParquet
|
||||
default:
|
||||
app.Fatal("unknown format", format)
|
||||
}
|
||||
|
||||
params.DirectMirrors, err = f.GetStringSlice("direct-mirror")
|
||||
if err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
params.IPFSGateways, err = f.GetStringSlice("ipfs-gateway")
|
||||
if err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
|
||||
util.MakeSampleDatabase(¶ms)
|
||||
}
|
||||
57
go/app/cmd/make_tokens/make-tokens.go
Normal file
57
go/app/cmd/make_tokens/make-tokens.go
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
package make_tokens
|
||||
|
||||
import (
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/thunderbrewhq/binana/go/app"
|
||||
"github.com/thunderbrewhq/binana/go/app/cmd/root"
|
||||
"github.com/thunderbrewhq/binana/go/app/util"
|
||||
"github.com/thunderbrewhq/binana/go/app/util/dbutil"
|
||||
)
|
||||
|
||||
var make_tokens_cmd = cobra.Command{
|
||||
Use: "make-tokens",
|
||||
Run: run_make_tokens_command,
|
||||
}
|
||||
|
||||
func init() {
|
||||
f := make_tokens_cmd.Flags()
|
||||
f.StringP("source", "s", "", "required: source tree of sample binaries")
|
||||
f.StringP("output-file", "o", "", "write the database to a file")
|
||||
f.StringP("format", "f", "json", "the format of the output database [json|parquet]")
|
||||
root.RootCmd.AddCommand(&make_tokens_cmd)
|
||||
}
|
||||
|
||||
func run_make_tokens_command(cmd *cobra.Command, args []string) {
|
||||
f := cmd.Flags()
|
||||
var (
|
||||
params util.MakeTokenDatabaseParams
|
||||
err error
|
||||
format string
|
||||
)
|
||||
params.Source, err = f.GetString("source")
|
||||
if err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
if params.Source == "" {
|
||||
cmd.Help()
|
||||
return
|
||||
}
|
||||
params.Output, err = f.GetString("output-file")
|
||||
if err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
format, err = f.GetString("format")
|
||||
if err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
switch format {
|
||||
case "json":
|
||||
params.Format = dbutil.DatabaseJSON
|
||||
case "parquet":
|
||||
params.Format = dbutil.DatabaseParquet
|
||||
default:
|
||||
app.Fatal("unknown format", format)
|
||||
}
|
||||
|
||||
util.MakeTokenDatabase(¶ms)
|
||||
}
|
||||
72
go/app/cmd/query/query.go
Normal file
72
go/app/cmd/query/query.go
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
package query
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/thunderbrewhq/binana/go/app"
|
||||
"github.com/thunderbrewhq/binana/go/app/cmd/root"
|
||||
"github.com/thunderbrewhq/binana/go/app/util"
|
||||
)
|
||||
|
||||
var query_cmd = cobra.Command{
|
||||
Use: "q regexp",
|
||||
Args: cobra.MinimumNArgs(1),
|
||||
Short: "query the token database for information",
|
||||
Run: run_query_cmd,
|
||||
}
|
||||
|
||||
func init() {
|
||||
f := query_cmd.Flags()
|
||||
f.Uint32("min-build", 0, "the minimum build to return tokens for")
|
||||
f.Uint32("max-build", math.MaxUint32, "the maximum build to return tokens for")
|
||||
f.StringSlice("program", nil, "a list of programs to return tokens for")
|
||||
f.StringSlice("os", nil, "a list of kernel names to return tokens for (windows, darwin, linux)")
|
||||
f.StringSlice("arch", nil, "a list of CPU architectures to return tokens for (ppc, 386, amd64)")
|
||||
f.String("present", "normal", "control the way tokens are presented to console (normal, name-only)")
|
||||
root.RootCmd.AddCommand(&query_cmd)
|
||||
}
|
||||
|
||||
func run_query_cmd(cmd *cobra.Command, args []string) {
|
||||
f := cmd.Flags()
|
||||
var (
|
||||
params util.QueryParams
|
||||
err error
|
||||
presentation_mode string
|
||||
)
|
||||
params.MinBuild, err = f.GetUint32("min-build")
|
||||
if err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
params.MaxBuild, err = f.GetUint32("max-build")
|
||||
if err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
params.Program, err = f.GetStringSlice("program")
|
||||
if err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
params.OS, err = f.GetStringSlice("os")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
params.Arch, err = f.GetStringSlice("arch")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
presentation_mode, err = f.GetString("present")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
switch presentation_mode {
|
||||
case "normal":
|
||||
params.Present = util.PresentQueryNormal
|
||||
case "name-only":
|
||||
params.Present = util.PresentQueryNameOnly
|
||||
default:
|
||||
cmd.Help()
|
||||
return
|
||||
}
|
||||
params.Token = args[0]
|
||||
util.Query(¶ms)
|
||||
}
|
||||
14
go/app/util/dbutil/format.go
Normal file
14
go/app/util/dbutil/format.go
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
package dbutil
|
||||
|
||||
import "errors"
|
||||
|
||||
type DatabaseFormat uint8
|
||||
|
||||
const (
|
||||
DatabaseParquet DatabaseFormat = iota
|
||||
DatabaseJSON
|
||||
)
|
||||
|
||||
var (
|
||||
ErrUnknownDatabaseFormat = errors.New("dbutil: unknown database format")
|
||||
)
|
||||
80
go/app/util/dbutil/writer.go
Normal file
80
go/app/util/dbutil/writer.go
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
package dbutil
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/parquet-go/parquet-go"
|
||||
)
|
||||
|
||||
type Writer[T any] struct {
|
||||
write func([]T) (err error)
|
||||
close func() (err error)
|
||||
}
|
||||
|
||||
func (writer *Writer[T]) WriteEntries(entries []T) (err error) {
|
||||
err = writer.write(entries)
|
||||
return
|
||||
}
|
||||
|
||||
func (writer *Writer[T]) Close() (err error) {
|
||||
err = writer.close()
|
||||
return
|
||||
}
|
||||
|
||||
func Open[T any](name string, format DatabaseFormat) (writer *Writer[T], err error) {
|
||||
writer = new(Writer[T])
|
||||
var (
|
||||
file *os.File
|
||||
output io.Writer
|
||||
)
|
||||
if name == "" {
|
||||
output = os.Stdout
|
||||
} else {
|
||||
file, err = os.Create(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
output = file
|
||||
}
|
||||
switch format {
|
||||
|
||||
case DatabaseJSON:
|
||||
encoder := json.NewEncoder(output)
|
||||
writer.write = func(entries []T) (err error) {
|
||||
for _, entry := range entries {
|
||||
if err = encoder.Encode(&entry); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
writer.close = func() (err error) {
|
||||
if file != nil {
|
||||
err = file.Close()
|
||||
}
|
||||
return
|
||||
}
|
||||
case DatabaseParquet:
|
||||
generic_writer := parquet.NewGenericWriter[T](output)
|
||||
writer.write = func(entries []T) (err error) {
|
||||
_, err = generic_writer.Write(entries)
|
||||
return
|
||||
}
|
||||
writer.close = func() (err error) {
|
||||
if err = generic_writer.Close(); err != nil {
|
||||
return
|
||||
}
|
||||
if file != nil {
|
||||
err = file.Close()
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
default:
|
||||
err = fmt.Errorf("%w: %d", ErrUnknownDatabaseFormat, format)
|
||||
}
|
||||
return
|
||||
}
|
||||
49
go/app/util/demangle.go
Normal file
49
go/app/util/demangle.go
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
package util
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var demangle_cache = make(map[string]string)
|
||||
|
||||
func demangle(str string) (demangled string, err error) {
|
||||
var ok bool
|
||||
if demangled, ok = demangle_cache[str]; ok {
|
||||
return
|
||||
}
|
||||
|
||||
var output bytes.Buffer
|
||||
c := exec.Command("demumble", str)
|
||||
c.Stdout = &output
|
||||
c.Run()
|
||||
if output.Len() == 0 {
|
||||
err = fmt.Errorf("cannot demangle")
|
||||
return
|
||||
}
|
||||
demangled = output.String()
|
||||
demangled = strings.TrimSuffix(demangled, "\n")
|
||||
if str == demangled {
|
||||
err = fmt.Errorf("cannot demangle")
|
||||
return
|
||||
}
|
||||
demangle_cache[str] = demangled
|
||||
return
|
||||
}
|
||||
|
||||
func looks_mangled(str string) bool {
|
||||
if strings.HasPrefix(str, ".") {
|
||||
return true
|
||||
} else if strings.HasPrefix(str, "?") {
|
||||
return true
|
||||
} else if strings.HasPrefix(str, "_Z") {
|
||||
return true
|
||||
} else if len(str) > 0 && str[0] >= '0' && str[0] <= '9' {
|
||||
return true
|
||||
} else if strings.ContainsAny(str, "0123456789") && !strings.Contains(str, " ") {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
31
go/app/util/exe.go
Normal file
31
go/app/util/exe.go
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
package util
|
||||
|
||||
import (
|
||||
"debug/pe"
|
||||
"os"
|
||||
)
|
||||
|
||||
func get_exe_base_address(name string) (base_address uint64, err error) {
|
||||
var file *os.File
|
||||
file, err = os.Open(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
var (
|
||||
pe_file *pe.File
|
||||
)
|
||||
pe_file, err = pe.NewFile(file)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
base_address = uint64(0x400000)
|
||||
|
||||
switch h := pe_file.OptionalHeader.(type) {
|
||||
case *pe.OptionalHeader32:
|
||||
base_address = uint64(h.ImageBase)
|
||||
case *pe.OptionalHeader64:
|
||||
base_address = h.ImageBase
|
||||
}
|
||||
file.Close()
|
||||
return
|
||||
}
|
||||
19
go/app/util/hash.go
Normal file
19
go/app/util/hash.go
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
package util
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"os"
|
||||
)
|
||||
|
||||
func hash_file(name string) (id string, err error) {
|
||||
var b []byte
|
||||
b, err = os.ReadFile(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
h := sha256.New()
|
||||
h.Write(b[:])
|
||||
id = hex.EncodeToString(h.Sum(nil))
|
||||
return
|
||||
}
|
||||
224
go/app/util/make-samples.go
Normal file
224
go/app/util/make-samples.go
Normal file
|
|
@ -0,0 +1,224 @@
|
|||
package util
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/thunderbrewhq/binana/go/app"
|
||||
"github.com/thunderbrewhq/binana/go/app/util/dbutil"
|
||||
"github.com/thunderbrewhq/binana/go/db"
|
||||
)
|
||||
|
||||
type MakeSampleDatabaseParams struct {
|
||||
// A file name that corresponds to a tree of sample files.
|
||||
// Anything in this tree will be collected into the sample database
|
||||
Source string
|
||||
|
||||
// The name of the file to write the database to
|
||||
Output string
|
||||
|
||||
// Sets the format of the database file
|
||||
Format dbutil.DatabaseFormat
|
||||
|
||||
// URLs that maps to the root of the sample tree hierarchy.
|
||||
// Used to generate a list of mirror URLs for sample binaries
|
||||
DirectMirrors []string
|
||||
|
||||
// List of IPFS Gateway URLs
|
||||
// If not empty, a CID for the sample tree will be created,
|
||||
// Actually uploading anything in the sample tree, however,
|
||||
// is outside the scope of this tool
|
||||
IPFSGateways []string
|
||||
}
|
||||
|
||||
type sample_database struct {
|
||||
writer *dbutil.Writer[db.Sample]
|
||||
ipfs_tree_cid string
|
||||
buffer []db.Sample
|
||||
}
|
||||
|
||||
func (sample_database *sample_database) add(sample db.Sample) (err error) {
|
||||
sample_database.buffer = append(sample_database.buffer, sample)
|
||||
return
|
||||
}
|
||||
|
||||
func (sample_database *sample_database) Close() (err error) {
|
||||
if err = sample_database.writer.WriteEntries(sample_database.buffer); err != nil {
|
||||
return
|
||||
}
|
||||
err = sample_database.writer.Close()
|
||||
return
|
||||
}
|
||||
|
||||
func (sample_database *sample_database) make_sample_file(params *MakeSampleDatabaseParams, name, relative_name string) (err error) {
|
||||
var sample db.Sample
|
||||
// infer mime-type from extension
|
||||
switch filepath.Ext(name) {
|
||||
case ".exe":
|
||||
sample.MimeType = "application/vnd.microsoft.portable-executable"
|
||||
case ".pdb":
|
||||
sample.MimeType = "application/x-ms-pdb"
|
||||
// associate the PDB with its EXE
|
||||
sample_exe_name := strings.TrimSuffix(name, ".pdb") + ".exe"
|
||||
if _, err = os.Stat(sample_exe_name); err == nil {
|
||||
sample.Executable, err = hash_file(sample_exe_name)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
case ".macho":
|
||||
sample.MimeType = "application/x-mach-binary"
|
||||
case ".elf":
|
||||
sample.MimeType = "application/x-executable"
|
||||
default:
|
||||
// don't care about this
|
||||
return
|
||||
}
|
||||
|
||||
sample.ID, err = hash_file(name)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// get the base filename
|
||||
base_name := filepath.Base(name)
|
||||
|
||||
// split the base filename without its extension
|
||||
filename_components := strings.Split(strings.TrimSuffix(base_name, filepath.Ext(base_name)), "-")
|
||||
// now, parse the filename (these must be correctly named!)
|
||||
sample.Program = filename_components[0]
|
||||
sample.Version = filename_components[1]
|
||||
var build uint64
|
||||
build, err = strconv.ParseUint(filename_components[2], 0, 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
sample.Build = uint32(build)
|
||||
sample.OS = filename_components[3]
|
||||
sample.Arch = filename_components[4]
|
||||
|
||||
// now, create various mirrors
|
||||
for _, direct_mirror := range params.DirectMirrors {
|
||||
sample.Mirrors = append(sample.Mirrors, db.SampleMirror{
|
||||
Kind: db.MirrorDirect,
|
||||
URL: direct_mirror + relative_name,
|
||||
})
|
||||
}
|
||||
for _, ipfs_gateway := range params.IPFSGateways {
|
||||
sample.Mirrors = append(sample.Mirrors, db.SampleMirror{
|
||||
Kind: db.MirrorIPFS,
|
||||
URL: ipfs_gateway + "/" + sample_database.ipfs_tree_cid + relative_name,
|
||||
})
|
||||
}
|
||||
|
||||
// now write the sample
|
||||
|
||||
if err = sample_database.add(sample); err != nil {
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (sample_database *sample_database) make_tree(params *MakeSampleDatabaseParams, name, relative_name string) (err error) {
|
||||
var (
|
||||
tree_entries []os.DirEntry
|
||||
)
|
||||
|
||||
tree_entries, err = os.ReadDir(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
for _, tree_entry := range tree_entries {
|
||||
if tree_entry.IsDir() {
|
||||
if err = sample_database.make_tree(params, name+"/"+tree_entry.Name(), relative_name+"/"+tree_entry.Name()); err != nil {
|
||||
return
|
||||
}
|
||||
} else {
|
||||
if err = sample_database.make_sample_file(params, name+"/"+tree_entry.Name(), relative_name+"/"+tree_entry.Name()); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func ipfs_generate_file_cid(name string) (cid string, err error) {
|
||||
|
||||
// todo
|
||||
// use command:
|
||||
// ipfs add -qr --only-hash .
|
||||
// inside the root of the sample tree
|
||||
// the last CID is the root of the tree
|
||||
|
||||
var (
|
||||
wd string
|
||||
)
|
||||
wd, err = os.Getwd()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
err = os.Chdir(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
command := exec.Command("ipfs", "add", "-qr", "--only-hash", ".")
|
||||
var command_output bytes.Buffer
|
||||
command.Stdout = &command_output
|
||||
command.Run()
|
||||
if command.ProcessState.ExitCode() != 0 {
|
||||
os.Chdir(wd)
|
||||
err = fmt.Errorf("util: ipfs tool exited: %d", command.ProcessState.ExitCode())
|
||||
return
|
||||
}
|
||||
|
||||
// Parse command Output
|
||||
command_output_scanner := bufio.NewScanner(&command_output)
|
||||
|
||||
for command_output_scanner.Scan() {
|
||||
cid = command_output_scanner.Text()
|
||||
}
|
||||
|
||||
err = os.Chdir(wd)
|
||||
|
||||
return
|
||||
|
||||
}
|
||||
|
||||
func MakeSampleDatabase(params *MakeSampleDatabaseParams) {
|
||||
var (
|
||||
err error
|
||||
sample_database sample_database
|
||||
)
|
||||
|
||||
// if we want to generate IPFS links, start by getting the CID for the sample tree
|
||||
if len(params.IPFSGateways) != 0 {
|
||||
sample_database.ipfs_tree_cid, err = ipfs_generate_file_cid(params.Source)
|
||||
if err != nil {
|
||||
app.Fatal(err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
sample_database.writer, err = dbutil.Open[db.Sample](params.Output, params.Format)
|
||||
if err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
|
||||
// make the root tree, with our params, the source as the first tree, and "" (root) as the relative path
|
||||
if err = sample_database.make_tree(params, params.Source, ""); err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
|
||||
if err = sample_database.Close(); err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
}
|
||||
303
go/app/util/make-tokens.go
Normal file
303
go/app/util/make-tokens.go
Normal file
|
|
@ -0,0 +1,303 @@
|
|||
package util
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"debug/macho"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/thunderbrewhq/binana/go/app"
|
||||
"github.com/thunderbrewhq/binana/go/app/util/dbutil"
|
||||
"github.com/thunderbrewhq/binana/go/db"
|
||||
"github.com/thunderbrewhq/binana/go/pdbconv"
|
||||
"github.com/thunderbrewhq/binana/go/stringrecovery"
|
||||
)
|
||||
|
||||
type MakeTokenDatabaseParams struct {
|
||||
Source string
|
||||
Output string
|
||||
Format dbutil.DatabaseFormat
|
||||
}
|
||||
|
||||
func MakeTokenDatabase(params *MakeTokenDatabaseParams) {
|
||||
var (
|
||||
tokens_database tokens_database
|
||||
err error
|
||||
)
|
||||
if err = tokens_database.Open(params.Output, params.Format); err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
if err = tokens_database.make(params.Source); err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
if err = tokens_database.Close(); err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
type tokens_database struct {
|
||||
sequence uint64
|
||||
writer *dbutil.Writer[db.Token]
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) next_token_id() (id uint64) {
|
||||
id = tokens_database.sequence
|
||||
tokens_database.sequence++
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) Open(name string, format dbutil.DatabaseFormat) (err error) {
|
||||
tokens_database.sequence = 1
|
||||
tokens_database.writer, err = dbutil.Open[db.Token](name, format)
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) Close() (err error) {
|
||||
err = tokens_database.writer.Close()
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) Write(token *db.Token) (err error) {
|
||||
tokens := []db.Token{*token}
|
||||
if err = tokens_database.writer.WriteEntries(tokens); err != nil {
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) make_file_pdb(name string) (err error) {
|
||||
exe_name := strings.TrimSuffix(name, ".pdb") + ".exe"
|
||||
var base_address uint64
|
||||
base_address, err = get_exe_base_address(exe_name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Fprintln(os.Stderr, "[pdb]", name)
|
||||
var source_id string
|
||||
source_id, err = hash_file(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
fmt.Fprintln(os.Stderr, "[pdb]", source_id)
|
||||
// check for the existence of an alternate, .pdb.json.gz file
|
||||
_, err = os.Stat(name + ".json.gz")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
var (
|
||||
gzip_file *os.File
|
||||
gzip_reader *gzip.Reader
|
||||
)
|
||||
gzip_file, err = os.Open(name + ".json.gz")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
var pdb pdbconv.ProgramDatabase
|
||||
gzip_reader, err = gzip.NewReader(gzip_file)
|
||||
json_decoder := json.NewDecoder(gzip_reader)
|
||||
|
||||
if err = json_decoder.Decode(&pdb); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
gzip_file.Close()
|
||||
|
||||
var v pdb_token_visitor
|
||||
v.init(tokens_database, source_id, base_address)
|
||||
if err = v.visit_all(&pdb); err != nil {
|
||||
return
|
||||
}
|
||||
if err = v.write_tokens(); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) write_string_token(source_id string, section_name string, address uint64, str string) (err error) {
|
||||
var db_token db.Token
|
||||
db_token.ID = tokens_database.next_token_id()
|
||||
db_token.Source = source_id
|
||||
db_token.Section = section_name
|
||||
db_token.Kind = db.OriginalStringToken
|
||||
db_token.Offset = fmt.Sprintf("%X", address)
|
||||
|
||||
db_token.Names = append(db_token.Names, db.TokenName{db.OriginalName, str})
|
||||
|
||||
// detect if this is a mangled type identifier
|
||||
if looks_mangled(str) {
|
||||
demangled, err := demangle(str)
|
||||
if err == nil {
|
||||
db_token.Names = append(db_token.Names, db.TokenName{db.DemangledName, demangled})
|
||||
}
|
||||
}
|
||||
|
||||
err = tokens_database.Write(&db_token)
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) make_file_pe(name string) (err error) {
|
||||
var id string
|
||||
id, err = hash_file(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
err = stringrecovery.RecoverFile(name, func(section_name string, address uint64, str string) {
|
||||
fmt.Fprintf(os.Stderr, "[pe] string found: %s %08X %s\n", section_name, address, str)
|
||||
tokens_database.write_string_token(id, section_name, address, str)
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) make_file_macho(name string) (err error) {
|
||||
fmt.Fprintln(os.Stderr, "[mach-o]", name)
|
||||
var source_id string
|
||||
source_id, err = hash_file(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
fmt.Fprintln(os.Stderr, "[mach-o]", source_id)
|
||||
var (
|
||||
file *os.File
|
||||
macho_file *macho.File
|
||||
)
|
||||
file, err = os.Open(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
macho_file, err = macho.NewFile(file)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
_, dwarf_err := macho_file.DWARF()
|
||||
if dwarf_err == nil {
|
||||
fmt.Fprintln(os.Stderr, "DWARF!")
|
||||
time.Sleep(5 * time.Second)
|
||||
}
|
||||
fmt.Fprintln(os.Stderr, "[mach-o]", "cpu", macho_file.FileHeader.Cpu)
|
||||
fmt.Fprintln(os.Stderr, "[mach-o]", "loads:")
|
||||
// for _, load := range macho_file.Loads {
|
||||
// fmt.Fprintln(os.Stderr, "[mach-o]", load.String())
|
||||
// }
|
||||
fmt.Fprintln(os.Stderr, "[mach-o]", "sections:")
|
||||
for _, section := range macho_file.Sections {
|
||||
fmt.Fprintln(os.Stderr, "section", section.SectionHeader.Name)
|
||||
}
|
||||
|
||||
if macho_file.Dysymtab != nil {
|
||||
fmt.Fprintln(os.Stderr, "[mach-o]", "does not contain a dysymtab")
|
||||
} else {
|
||||
fmt.Fprintln(os.Stderr, "[mach-o]", "dysymtab:")
|
||||
}
|
||||
|
||||
var imported_symbols []string
|
||||
imported_symbols, err = macho_file.ImportedSymbols()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if macho_file.Symtab == nil {
|
||||
fmt.Fprintln(os.Stderr, "[mach-o]", "does not contain a symtab")
|
||||
} else {
|
||||
fmt.Fprintln(os.Stderr, "[mach-o]", "symtab:")
|
||||
for _, sym := range macho_file.Symtab.Syms {
|
||||
imported := slices.Contains(imported_symbols, sym.Name)
|
||||
var section_name string
|
||||
if sym.Sect != 0 {
|
||||
section_name = macho_file.Sections[sym.Sect-1].SectionHeader.Name
|
||||
}
|
||||
if imported {
|
||||
fmt.Fprintf(os.Stderr, "[mach-o] imported %s %02x %s\n", section_name, sym.Type, sym.Name)
|
||||
} else {
|
||||
if sym.Name == "" {
|
||||
// fmt.Fprintln(os.Stderr, "[mach-o]", "symbol has no name", "sect="+section_name, sym.Type, sym.Value, sym.Desc)
|
||||
} else {
|
||||
fmt.Fprintf(os.Stderr, "[mach-o] internal %s %02x %s\n", section_name, sym.Type, sym.Name)
|
||||
var token db.Token
|
||||
token.ID = tokens_database.next_token_id()
|
||||
token.Source = source_id
|
||||
token.Kind = db.OriginalSymbolToken
|
||||
token.Section = section_name
|
||||
token.Offset = fmt.Sprintf("%X", sym.Value)
|
||||
token.Names = append(token.Names, db.TokenName{db.OriginalName, sym.Name})
|
||||
|
||||
if looks_mangled(sym.Name) {
|
||||
demangled, err := demangle(sym.Name)
|
||||
if err == nil {
|
||||
token.Names = append(token.Names, db.TokenName{db.DemangledName, demangled})
|
||||
}
|
||||
}
|
||||
|
||||
tokens_database.Write(&token)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
file.Close()
|
||||
|
||||
if err = stringrecovery.RecoverFile(name, func(section_name string, address uint64, str string) {
|
||||
fmt.Fprintf(os.Stderr, "[mach-o] string found: %s %08X %s\n", section_name, address, str)
|
||||
tokens_database.write_string_token(source_id, section_name, address, str)
|
||||
}); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) make_file(name string) (err error) {
|
||||
switch filepath.Ext(name) {
|
||||
case ".macho":
|
||||
err = tokens_database.make_file_macho(name)
|
||||
case ".pdb":
|
||||
err = tokens_database.make_file_pdb(name)
|
||||
case ".exe":
|
||||
err = tokens_database.make_file_pe(name)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) make_directory(name string) (err error) {
|
||||
var entries []os.DirEntry
|
||||
entries, err = os.ReadDir(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() {
|
||||
if err = tokens_database.make_directory(filepath.Join(name, entry.Name())); err != nil {
|
||||
return
|
||||
}
|
||||
} else {
|
||||
if err = tokens_database.make_file(filepath.Join(name, entry.Name())); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (tokens_database *tokens_database) make(name string) (err error) {
|
||||
var fi os.FileInfo
|
||||
fi, err = os.Stat(name)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if fi.IsDir() {
|
||||
err = tokens_database.make_directory(name)
|
||||
} else {
|
||||
err = tokens_database.make_file(name)
|
||||
}
|
||||
return
|
||||
}
|
||||
474
go/app/util/pdb.go
Normal file
474
go/app/util/pdb.go
Normal file
|
|
@ -0,0 +1,474 @@
|
|||
package util
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"maps"
|
||||
"slices"
|
||||
"sort"
|
||||
"strconv"
|
||||
|
||||
"github.com/thunderbrewhq/binana/go/db"
|
||||
"github.com/thunderbrewhq/binana/go/pdbconv"
|
||||
)
|
||||
|
||||
type pdb_token_visitor struct {
|
||||
tokens_database *tokens_database
|
||||
|
||||
// location of the base module
|
||||
base_address uint64
|
||||
pdb_source_id string
|
||||
strings map[string]*db.Token
|
||||
datatypes map[string]*db.Token
|
||||
// maps a symbol to a token
|
||||
symbols map[uint64]*db.Token
|
||||
|
||||
constants []db.Token
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) init(tokens_database *tokens_database, pdb_source_id string, base_address uint64) {
|
||||
v.base_address = base_address
|
||||
v.pdb_source_id = pdb_source_id
|
||||
v.strings = make(map[string]*db.Token)
|
||||
v.datatypes = make(map[string]*db.Token)
|
||||
// map of address to token
|
||||
v.symbols = make(map[uint64]*db.Token)
|
||||
v.tokens_database = tokens_database
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_class(class *pdbconv.Class) (err error) {
|
||||
token, ok := v.datatypes[class.Name]
|
||||
if !ok {
|
||||
token = new(db.Token)
|
||||
v.datatypes[class.Name] = token
|
||||
}
|
||||
|
||||
// set token source to pdb
|
||||
token.Source = v.pdb_source_id
|
||||
// kind is debug information token
|
||||
token.Kind = db.OriginalDatatypeToken
|
||||
|
||||
// set original name
|
||||
token.Names = append(token.Names, db.TokenName{db.OriginalName, class.Name})
|
||||
|
||||
// set the basic type
|
||||
token.Keyword = "class"
|
||||
|
||||
for _, member := range class.Members {
|
||||
var token_member db.TokenMember
|
||||
if member.Kind == "Member" {
|
||||
token_member.Kind = db.FieldMember
|
||||
} else if member.Kind == "Unknown" && member.Datatype == "void *" {
|
||||
token_member.Kind = db.MethodMember
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
token_member.Key = member.Name
|
||||
token_member.Value = member.Datatype
|
||||
|
||||
if !slices.Contains(token.Members, token_member) {
|
||||
token.Members = append(token.Members, token_member)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_datatype(datatype *pdbconv.Datatype) (err error) {
|
||||
token, ok := v.datatypes[datatype.Name]
|
||||
if !ok {
|
||||
token = new(db.Token)
|
||||
v.datatypes[datatype.Name] = token
|
||||
}
|
||||
|
||||
// set token source to pdb
|
||||
token.Source = v.pdb_source_id
|
||||
// kind is debug information token
|
||||
token.Kind = db.OriginalDatatypeToken
|
||||
|
||||
// set original name
|
||||
token.Names = append(token.Names, db.TokenName{db.OriginalName, datatype.Name})
|
||||
|
||||
// set the basic type
|
||||
if datatype.Kind == "Structure" {
|
||||
token.Keyword = "struct"
|
||||
} else if datatype.Kind == "Union" {
|
||||
token.Keyword = "union"
|
||||
} else {
|
||||
err = fmt.Errorf("unhandled datatype kind '%s'", datatype.Kind)
|
||||
return
|
||||
}
|
||||
|
||||
for _, member := range datatype.Members {
|
||||
var token_member db.TokenMember
|
||||
if member.Kind == "Member" {
|
||||
token_member.Kind = db.FieldMember
|
||||
} else if member.Kind == "Unknown" && member.Datatype == "void *" {
|
||||
token_member.Kind = db.MethodMember
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
token_member.Key = member.Name
|
||||
token_member.Value = member.Datatype
|
||||
|
||||
if !slices.Contains(token.Members, token_member) {
|
||||
token.Members = append(token.Members, token_member)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_function(function *pdbconv.Function) (err error) {
|
||||
var address uint64
|
||||
address, err = strconv.ParseUint(function.Address, 0, 64)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
address = v.base_address + address
|
||||
|
||||
token, ok := v.symbols[address]
|
||||
if !ok {
|
||||
token = new(db.Token)
|
||||
v.symbols[address] = token
|
||||
}
|
||||
|
||||
// set token source to pdb
|
||||
token.Source = v.pdb_source_id
|
||||
// kind is symbol information token
|
||||
token.Kind = db.OriginalSymbolToken
|
||||
// set address
|
||||
token.Offset = fmt.Sprintf("%X", address)
|
||||
// set original name
|
||||
token.Names = append(token.Names, db.TokenName{db.OriginalName, function.Name})
|
||||
|
||||
if looks_mangled(function.Name) {
|
||||
demangled, demangler_err := demangle(function.Name)
|
||||
if demangler_err == nil {
|
||||
token.Names = append(token.Names, db.TokenName{db.DemangledName, demangled})
|
||||
}
|
||||
}
|
||||
|
||||
// visit source files
|
||||
for _, line_number := range function.LineNumbers {
|
||||
if err = v.visit_string(line_number.SourceFile); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// classify stack variables as members
|
||||
for _, stack_variable := range function.StackVariables {
|
||||
var token_member db.TokenMember
|
||||
if stack_variable.Kind == "Parameter" || stack_variable.Kind == "ObjectPointer" {
|
||||
token_member.Kind = db.ParameterMember
|
||||
} else if stack_variable.Kind == "Local" {
|
||||
token_member.Kind = db.LocalMember
|
||||
} else if stack_variable.Kind == "StaticLocal" {
|
||||
token_member.Kind = db.StaticLocalMember
|
||||
} else if stack_variable.Kind == "Constant" {
|
||||
// these are repeated elsewhere
|
||||
continue
|
||||
} else {
|
||||
err = fmt.Errorf("unhandled stack variable kind '%s'", stack_variable.Kind)
|
||||
return
|
||||
}
|
||||
|
||||
token_member.Key = stack_variable.Name
|
||||
token_member.Value = stack_variable.Datatype
|
||||
|
||||
token.Members = append(token.Members, token_member)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_enum(enum *pdbconv.Enum) (err error) {
|
||||
// create hash of enum's contents and use to key the datatype
|
||||
h := sha256.New()
|
||||
h.Write([]byte(enum.Name))
|
||||
for _, member := range enum.Members {
|
||||
h.Write([]byte(member.Name))
|
||||
h.Write([]byte(fmt.Sprintf("%d", member.Value)))
|
||||
}
|
||||
name := hex.EncodeToString(h.Sum(nil))
|
||||
|
||||
token, ok := v.datatypes[name]
|
||||
if !ok {
|
||||
token = new(db.Token)
|
||||
v.datatypes[name] = token
|
||||
}
|
||||
|
||||
token.Source = v.pdb_source_id
|
||||
|
||||
token.Keyword = "enum"
|
||||
|
||||
// apply name (may be __unnamed)
|
||||
token.Names = append(token.Names, db.TokenName{db.OriginalName, enum.Name})
|
||||
|
||||
// this is an original datatype
|
||||
token.Kind = db.OriginalDatatypeToken
|
||||
|
||||
for _, member := range enum.Members {
|
||||
var token_member db.TokenMember
|
||||
token_member.Kind = db.EnumMember
|
||||
token_member.Key = member.Name
|
||||
token_member.Value = fmt.Sprintf("%d", member.Value)
|
||||
|
||||
if !slices.Contains(token.Members, token_member) {
|
||||
token.Members = append(token.Members, token_member)
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_string(s string) (err error) {
|
||||
token, ok := v.strings[s]
|
||||
if !ok {
|
||||
token = new(db.Token)
|
||||
v.strings[s] = token
|
||||
}
|
||||
|
||||
// apply source
|
||||
token.Source = v.pdb_source_id
|
||||
|
||||
// this is a string token
|
||||
token.Kind = db.OriginalStringToken
|
||||
|
||||
// add name
|
||||
var token_name db.TokenName
|
||||
token_name.Kind = db.OriginalName
|
||||
token_name.Name = s
|
||||
token.Names = []db.TokenName{token_name}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_source_files_table(table *pdbconv.Table) (err error) {
|
||||
for _, source_file := range table.SourceFiles {
|
||||
if err = v.visit_string(source_file.Name); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_constant(symbol *pdbconv.TableSymbol) (err error) {
|
||||
var token db.Token
|
||||
token.ID = v.tokens_database.next_token_id()
|
||||
token.Source = v.pdb_source_id
|
||||
token.Keyword = "const"
|
||||
token.Datatype = symbol.Datatype
|
||||
|
||||
if symbol.Name != "" {
|
||||
var name db.TokenName
|
||||
name.Kind = db.OriginalName
|
||||
name.Name = symbol.Name
|
||||
token.Names = append(token.Names, name)
|
||||
}
|
||||
|
||||
if symbol.Undecorated != "" {
|
||||
var name db.TokenName
|
||||
name.Kind = db.OriginalName
|
||||
name.Name = symbol.Undecorated
|
||||
token.Names = append(token.Names, name)
|
||||
}
|
||||
|
||||
var value db.TokenMember
|
||||
value.Kind = db.ConstantValueMember
|
||||
value.Value = symbol.Value
|
||||
|
||||
token.Members = append(token.Members, value)
|
||||
|
||||
v.constants = append(v.constants, token)
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_table_symbol(symbol *pdbconv.TableSymbol) (err error) {
|
||||
if symbol.Address == "0x0" {
|
||||
if symbol.Value != "" && symbol.Kind == "Constant" {
|
||||
err = v.visit_constant(symbol)
|
||||
return
|
||||
}
|
||||
|
||||
// so, this does not correspond to an actual symbol.
|
||||
// we can still mine it for string tokens.
|
||||
if symbol.Name != "" {
|
||||
if err = v.visit_string(symbol.Name); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if symbol.Undecorated != "" {
|
||||
if err = v.visit_string(symbol.Name); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// this corresponds to an address
|
||||
// compute the real address
|
||||
var address uint64
|
||||
address, err = strconv.ParseUint(symbol.Address, 0, 64)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
address = v.base_address + address
|
||||
|
||||
token, ok := v.symbols[address]
|
||||
if !ok {
|
||||
token = new(db.Token)
|
||||
v.symbols[address] = token
|
||||
}
|
||||
token.Source = v.pdb_source_id
|
||||
|
||||
if symbol.Datatype != "" {
|
||||
token.Datatype = symbol.Datatype
|
||||
}
|
||||
|
||||
if symbol.Kind == "FileStatic" {
|
||||
token.Keyword = "static"
|
||||
} else if symbol.Kind == "Global" {
|
||||
token.Keyword = "global"
|
||||
}
|
||||
|
||||
if symbol.Name != "" {
|
||||
token_name := db.TokenName{db.OriginalName, symbol.Name}
|
||||
if !slices.Contains(token.Names, token_name) {
|
||||
token.Names = append(token.Names, token_name)
|
||||
}
|
||||
}
|
||||
|
||||
if symbol.Undecorated != "" {
|
||||
undecorated := db.TokenName{db.OriginalName, symbol.Undecorated}
|
||||
if !slices.Contains(token.Names, undecorated) {
|
||||
token.Names = append(token.Names, undecorated)
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_symbols_table(table *pdbconv.Table) (err error) {
|
||||
for _, symbol := range table.Symbols {
|
||||
if err = v.visit_table_symbol(&symbol); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_table(table *pdbconv.Table) (err error) {
|
||||
if table.Name == "SourceFiles" {
|
||||
err = v.visit_source_files_table(table)
|
||||
} else if table.Name == "Symbols" {
|
||||
err = v.visit_symbols_table(table)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_typedef(typedef *pdbconv.Typedef) (err error) {
|
||||
token, ok := v.datatypes[typedef.Name]
|
||||
if !ok {
|
||||
token = new(db.Token)
|
||||
v.datatypes[typedef.Name] = token
|
||||
}
|
||||
|
||||
token.Source = v.pdb_source_id
|
||||
|
||||
token.Kind = db.OriginalDatatypeToken
|
||||
|
||||
var token_name db.TokenName
|
||||
token_name.Kind = db.OriginalName
|
||||
token_name.Name = typedef.Name
|
||||
|
||||
if !slices.Contains(token.Names, token_name) {
|
||||
token.Names = append(token.Names, token_name)
|
||||
}
|
||||
|
||||
token.Datatype = typedef.Basetype
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) visit_all(pdb *pdbconv.ProgramDatabase) (err error) {
|
||||
for _, class := range pdb.Classes {
|
||||
if err = v.visit_class(&class); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
for _, datatype := range pdb.Datatypes {
|
||||
if err = v.visit_datatype(&datatype); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
for _, enum := range pdb.Enums {
|
||||
if err = v.visit_enum(&enum); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
for _, function := range pdb.Functions {
|
||||
if err = v.visit_function(&function); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
for _, table := range pdb.Tables {
|
||||
if err = v.visit_table(&table); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
for _, typedef := range pdb.Typedefs {
|
||||
if err = v.visit_typedef(&typedef); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (v *pdb_token_visitor) write_tokens() (err error) {
|
||||
datatypes := slices.Collect(maps.Keys(v.datatypes))
|
||||
sort.Strings(datatypes)
|
||||
symbols := slices.Collect(maps.Keys(v.symbols))
|
||||
slices.SortFunc(symbols, func(a, b uint64) int {
|
||||
if a < b {
|
||||
return -1
|
||||
} else if a == b {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
})
|
||||
strings := slices.Collect(maps.Keys(v.strings))
|
||||
sort.Strings(strings)
|
||||
for _, datatype := range datatypes {
|
||||
if err = v.tokens_database.Write(v.datatypes[datatype]); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
for _, symbol := range symbols {
|
||||
if err = v.tokens_database.Write(v.symbols[symbol]); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
for _, string := range strings {
|
||||
if err = v.tokens_database.Write(v.strings[string]); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
for _, constant := range v.constants {
|
||||
if err = v.tokens_database.Write(&constant); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
189
go/app/util/query.go
Normal file
189
go/app/util/query.go
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
package util
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"regexp"
|
||||
"slices"
|
||||
|
||||
"github.com/parquet-go/parquet-go"
|
||||
"github.com/thunderbrewhq/binana/go/app"
|
||||
"github.com/thunderbrewhq/binana/go/db"
|
||||
)
|
||||
|
||||
type QueryPresentationMode uint8
|
||||
|
||||
const (
|
||||
PresentQueryNormal QueryPresentationMode = iota
|
||||
PresentQueryNameOnly
|
||||
)
|
||||
|
||||
type QueryParams struct {
|
||||
//
|
||||
Present QueryPresentationMode
|
||||
// Match pattern for profile
|
||||
Profile string
|
||||
// Possible values for Program
|
||||
Program []string
|
||||
// Possible values for OS
|
||||
OS []string
|
||||
// Possible values for arch
|
||||
Arch []string
|
||||
// Range of builds to return information for
|
||||
MinBuild uint32
|
||||
MaxBuild uint32
|
||||
// Regular expression for tokens (symbols/type information)
|
||||
Token string
|
||||
}
|
||||
|
||||
type token_query struct {
|
||||
params *QueryParams
|
||||
sample_database map[string]db.Sample
|
||||
token_regexp *regexp.Regexp
|
||||
}
|
||||
|
||||
func (token_query *token_query) present_token(token *db.Token) {
|
||||
if token_query.params.Present == PresentQueryNameOnly {
|
||||
for _, name := range token.Names {
|
||||
if token_query.token_regexp.MatchString(name.Name) {
|
||||
fmt.Println(name.Name)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
kind_name := ""
|
||||
switch token.Kind {
|
||||
case db.OriginalConstantToken:
|
||||
kind_name = "original constant"
|
||||
case db.OriginalDatatypeToken:
|
||||
kind_name = "original datatype"
|
||||
case db.OriginalStringToken:
|
||||
kind_name = "original string"
|
||||
case db.OriginalSymbolToken:
|
||||
kind_name = "original symbol"
|
||||
default:
|
||||
return
|
||||
}
|
||||
fmt.Printf("%s in sample: '%s' section: '%s'", kind_name, token.Source[:8], token.Section)
|
||||
if token.Offset != "" {
|
||||
fmt.Printf(" at %s", token.Offset)
|
||||
}
|
||||
if token.Datatype != "" {
|
||||
fmt.Printf(" with datatype: '%s'", token.Datatype)
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
fmt.Printf("names:\n")
|
||||
|
||||
for _, name := range token.Names {
|
||||
name_kind_name := ""
|
||||
switch name.Kind {
|
||||
case db.OriginalName:
|
||||
name_kind_name = "original name"
|
||||
case db.DemangledName:
|
||||
name_kind_name = "demangled name"
|
||||
case db.BinanaizedName:
|
||||
name_kind_name = "binanaized name"
|
||||
default:
|
||||
panic(name.Kind)
|
||||
}
|
||||
|
||||
fmt.Printf("%s '%s'\n", name_kind_name, name.Name)
|
||||
}
|
||||
|
||||
fmt.Printf("--\n\n")
|
||||
}
|
||||
|
||||
// attempt to match token and report to stdout
|
||||
// if returns quit = true, the search is halted
|
||||
func (token_query *token_query) match_token(token *db.Token) (quit bool) {
|
||||
matched := false
|
||||
sample, ok := token_query.sample_database[token.Source]
|
||||
if !ok {
|
||||
m, err := json.Marshal(token)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
fmt.Fprintln(os.Stderr, string(m))
|
||||
app.Fatal(fmt.Errorf("a token references a sample (%s) that does not exist in the sample database. please fix your database", token.Source))
|
||||
return
|
||||
}
|
||||
// filter out tokens from samples we don't care about
|
||||
if sample.Build < token_query.params.MinBuild || sample.Build > token_query.params.MaxBuild {
|
||||
return
|
||||
}
|
||||
if len(token_query.params.Program) > 0 {
|
||||
if !slices.Contains(token_query.params.Program, sample.Program) {
|
||||
return
|
||||
}
|
||||
}
|
||||
if len(token_query.params.OS) > 0 {
|
||||
if !slices.Contains(token_query.params.OS, sample.OS) {
|
||||
return
|
||||
}
|
||||
}
|
||||
if len(token_query.params.Arch) > 0 {
|
||||
if !slices.Contains(token_query.params.Arch, sample.Arch) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
for _, name := range token.Names {
|
||||
if token_query.token_regexp.MatchString(name.Name) {
|
||||
matched = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if matched {
|
||||
token_query.present_token(token)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (token_query *token_query) load_sample_database() (err error) {
|
||||
token_query.sample_database = make(map[string]db.Sample)
|
||||
|
||||
var samples []db.Sample
|
||||
samples, err = parquet.ReadFile[db.Sample]("db/samples.parquet")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
for _, sample := range samples {
|
||||
token_query.sample_database[sample.ID] = sample
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func Query(params *QueryParams) {
|
||||
var token_query token_query
|
||||
token_query.params = params
|
||||
token_query.token_regexp = regexp.MustCompilePOSIX(token_query.params.Token)
|
||||
|
||||
if err := token_query.load_sample_database(); err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
|
||||
tokens_db_file, err := os.Open("db/tokens.parquet")
|
||||
if err != nil {
|
||||
app.Fatal(err)
|
||||
}
|
||||
|
||||
rows := make([]db.Token, 1024)
|
||||
reader := parquet.NewGenericReader[db.Token](tokens_db_file)
|
||||
read_loop:
|
||||
for {
|
||||
n, err := reader.Read(rows)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
for _, token := range rows[:n] {
|
||||
if token_query.match_token(&token) {
|
||||
break read_loop
|
||||
}
|
||||
}
|
||||
}
|
||||
reader.Close()
|
||||
tokens_db_file.Close()
|
||||
}
|
||||
47
go/db/sample.go
Normal file
47
go/db/sample.go
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
package db
|
||||
|
||||
type MirrorKind uint8
|
||||
|
||||
const (
|
||||
MirrorDirect MirrorKind = iota
|
||||
MirrorIPFS
|
||||
)
|
||||
|
||||
type SampleMirror struct {
|
||||
Kind MirrorKind `json:"kind" parquet:"kind"`
|
||||
URL string `json:"url" parquet:"url,delta"`
|
||||
}
|
||||
|
||||
type Sample struct {
|
||||
// The SHA-256 sum of the sample file
|
||||
ID string `json:"id" parquet:"id,dict"`
|
||||
|
||||
// <Optional> if this is a debugging file, then this is a SHA-256 sum
|
||||
// which references another sample file, which is the executable file
|
||||
Executable string `json:"exe,omitempty" parquet:"exe,dict"`
|
||||
|
||||
// This is the MIME type identifier of the sample file.
|
||||
// Possible sample types include:
|
||||
// * (Windows .exe) application/vnd.microsoft.portable-executable
|
||||
// * (Mach-O binary) application/x-mach-binary
|
||||
// * (Linux binary) application/x-elf
|
||||
MimeType string `json:"mimetype" parquet:"mimetype,dict"`
|
||||
|
||||
// This is the code that signifies which program the sample is a build of.
|
||||
Program string `json:"program" parquet:"program,dict"`
|
||||
|
||||
// <Optional> This is the build sequence of the sample e.g. 12340
|
||||
Build uint32 `json:"build,omitempty" parquet:"build"`
|
||||
|
||||
// <Optional> This is the semantic version/release id of the sample e.g. 3.3.5a
|
||||
Version string `json:"version,omitempty" parquet:"version"`
|
||||
|
||||
// The OS of the sample, uses GOOS naming convention
|
||||
OS string `json:"os" parquet:"os,dict"`
|
||||
|
||||
// The architecture of the sample, uses GOARCH naming convention
|
||||
Arch string `json:"arch" parquet:"arch,dict"`
|
||||
|
||||
// A URL where the sample can be downloaded
|
||||
Mirrors []SampleMirror `json:"mirrors,omitempty" parquet:"mirrors"`
|
||||
}
|
||||
79
go/db/token.go
Normal file
79
go/db/token.go
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
package db
|
||||
|
||||
type (
|
||||
TokenKind uint8
|
||||
TokenNameKind uint8
|
||||
TokenMemberKind uint8
|
||||
)
|
||||
|
||||
const (
|
||||
// OriginalName means this string appeared verbatim in the original sample, and was not altered.
|
||||
OriginalName TokenNameKind = iota
|
||||
// DemangledName is provided in the case that the OriginalName was mangled by the compiler.
|
||||
DemangledName
|
||||
// Mangled names can be automatically Binanaized, i.e. converted into a naive syntax for wide
|
||||
// compatibility with SRE tools
|
||||
BinanaizedName
|
||||
)
|
||||
|
||||
const (
|
||||
// The token was obtained from a PDB or a Mach-O symtab
|
||||
OriginalSymbolToken TokenKind = iota
|
||||
// The token was found by scanning the non-executable sections of the binary for 0-terminated ASCII strings
|
||||
OriginalStringToken
|
||||
// The token is a datatype was obtained from a PDB or DWARF debugging file
|
||||
OriginalDatatypeToken
|
||||
// This token is a constant named value with no address
|
||||
OriginalConstantToken
|
||||
)
|
||||
|
||||
const (
|
||||
ConstantValueMember TokenMemberKind = iota
|
||||
EnumMember
|
||||
// This is a part of a struct
|
||||
// key = the field name
|
||||
// value = the C type of the field
|
||||
FieldMember
|
||||
// This is a method of a class
|
||||
MethodMember
|
||||
// This in argument to a function
|
||||
ParameterMember
|
||||
// This is a local variable in a function
|
||||
LocalMember
|
||||
// This is a statically declared variable in a function
|
||||
StaticLocalMember
|
||||
)
|
||||
|
||||
type TokenName struct {
|
||||
Kind TokenNameKind `json:"kind" parquet:"kind"`
|
||||
Name string `json:"name" parquet:"name,dict"`
|
||||
}
|
||||
|
||||
type TokenMember struct {
|
||||
Kind TokenMemberKind `json:"kind" parquet:"kind"`
|
||||
Key string `json:"key,omitempty" parquet:"key,dict"`
|
||||
Value string `json:"value" parquet:"value,dict"`
|
||||
}
|
||||
|
||||
type Token struct {
|
||||
// Unique 64-bit identifier
|
||||
ID uint64 `json:"id" parquet:"id"`
|
||||
// The SHA-256 hash id of the sample which generated the token
|
||||
Source string `json:"src" parquet:"src,dict"`
|
||||
// The color and subhead of the token
|
||||
Kind TokenKind `json:"kind" parquet:"kind"`
|
||||
// If this is a datatype, keyword tells you what kind of datatype it is. Useful when generating C code.
|
||||
Keyword string `json:"keyword,omitempty" parquet:"keyword,dict"`
|
||||
// If this is a global variable/constant, this tells you the data type
|
||||
Datatype string `json:"datatype,omitempty" parquet:"datatype,dict"`
|
||||
// The section where the token originated
|
||||
Section string `json:"section,omitempty"`
|
||||
// The offset (in hexadecimal) where the symbol
|
||||
Offset string `json:"offset,omitempty"`
|
||||
// Alternate names for the token
|
||||
Names []TokenName `json:"names"`
|
||||
// Clickable references to other tokens
|
||||
Highlights []string `json:"crumbs,omitempty"`
|
||||
// Struct/Enum members
|
||||
Members []TokenMember `json:"members,omitempty"`
|
||||
}
|
||||
109
go/pdbconv/db.go
Normal file
109
go/pdbconv/db.go
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
package pdbconv
|
||||
|
||||
type ClassMember struct {
|
||||
Datatype string `json:"datatype,omitempty"`
|
||||
Kind string `json:"kind,omitempty"`
|
||||
Length uint64 `json:"length,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Offset uint64 `json:"offset,omitempty"`
|
||||
}
|
||||
|
||||
type Class struct {
|
||||
Length string `json:"length,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Members []ClassMember `json:"member,omitempty"`
|
||||
}
|
||||
|
||||
type DatatypeMember struct {
|
||||
Datatype string `json:"datatype,omitempty"`
|
||||
Kind string `json:"kind,omitempty"`
|
||||
Length uint64 `json:"length,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Offset uint64 `json:"offset,omitempty"`
|
||||
}
|
||||
|
||||
type Datatype struct {
|
||||
Kind string `json:"kind,omitempty"`
|
||||
Length string `json:"length,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Members []DatatypeMember `json:"member,omitempty"`
|
||||
}
|
||||
|
||||
type EnumMember struct {
|
||||
Name string `json:"name,omitempty"`
|
||||
Value int `json:"value,omitempty"`
|
||||
}
|
||||
|
||||
type Enum struct {
|
||||
Length uint64 `json:"length,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Type string `json:"type,omitempty"`
|
||||
Members []EnumMember `json:"member,omitempty"`
|
||||
}
|
||||
|
||||
type FunctionLineNumber struct {
|
||||
Address string `json:"addr,omitempty"`
|
||||
End int `json:"end,omitempty"`
|
||||
Length int `json:"length,omitempty"`
|
||||
SourceFile string `json:"source_file,omitempty"`
|
||||
Start int `json:"start,omitempty"`
|
||||
}
|
||||
|
||||
type FunctionStackVariable struct {
|
||||
Datatype string `json:"datatype,omitempty"`
|
||||
Kind string `json:"kind,omitempty"`
|
||||
Length uint64 `json:"length,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Offset uint64 `json:"offset,omitempty"`
|
||||
}
|
||||
|
||||
type Function struct {
|
||||
Address string `json:"address,omitempty"`
|
||||
Length uint64 `json:"length,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
LineNumbers []FunctionLineNumber `json:"line_numbers,omitempty"`
|
||||
StackVariables []FunctionStackVariable `json:"stack_variables,omitempty"`
|
||||
}
|
||||
|
||||
type TableSegment struct {
|
||||
Address string `json:"address,omitempty"`
|
||||
Number int `json:"number,omitempty"`
|
||||
}
|
||||
|
||||
type TableSourceFile struct {
|
||||
ID string `json:"id,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
}
|
||||
|
||||
type TableSymbol struct {
|
||||
Address string `json:"address,omitempty"`
|
||||
Datatype string `json:"datatype,omitempty"`
|
||||
Index uint64 `json:"index,omitempty"`
|
||||
Kind string `json:"kind,omitempty"`
|
||||
Length uint64 `json:"length,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Tag string `json:"tag,omitempty"`
|
||||
Undecorated string `json:"undecorated,omitempty"`
|
||||
Value string `json:"value,omitempty"`
|
||||
}
|
||||
|
||||
type Table struct {
|
||||
Name string `json:"name,omitempty"`
|
||||
Segments []TableSegment `json:"segments,omitempty"`
|
||||
SourceFiles []TableSourceFile `json:"source_files,omitempty"`
|
||||
Symbols []TableSymbol `json:"symbols,omitempty"`
|
||||
}
|
||||
|
||||
type Typedef struct {
|
||||
Basetype string `json:"basetype,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
}
|
||||
|
||||
type ProgramDatabase struct {
|
||||
Classes []Class `json:"classes,omitempty"`
|
||||
Datatypes []Datatype `json:"datatypes,omitempty"`
|
||||
Enums []Enum `json:"enums,omitempty"`
|
||||
Functions []Function `json:"functions,omitempty"`
|
||||
Tables []Table `json:"tables,omitempty"`
|
||||
Typedefs []Typedef `json:"typedefs,omitempty"`
|
||||
}
|
||||
93
go/pdbconv/xml.go
Normal file
93
go/pdbconv/xml.go
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
package pdbconv
|
||||
|
||||
type GhidraXml struct {
|
||||
Classes struct {
|
||||
Class []struct {
|
||||
Length string `xml:"length,attr"`
|
||||
Name string `xml:"name,attr"`
|
||||
Member []struct {
|
||||
Datatype string `xml:"datatype,attr"`
|
||||
Kind string `xml:"kind,attr"`
|
||||
Length string `xml:"length,attr"`
|
||||
Name string `xml:"name,attr"`
|
||||
Offset string `xml:"offset,attr"`
|
||||
} `xml:"member"`
|
||||
} `xml:"class"`
|
||||
} `xml:"classes"`
|
||||
Datatypes struct {
|
||||
Datatype []struct {
|
||||
Kind string `xml:"kind,attr"`
|
||||
Length string `xml:"length,attr"`
|
||||
Name string `xml:"name,attr"`
|
||||
Member []struct {
|
||||
Datatype string `xml:"datatype,attr"`
|
||||
Kind string `xml:"kind,attr"`
|
||||
Length string `xml:"length,attr"`
|
||||
Name string `xml:"name,attr"`
|
||||
Offset string `xml:"offset,attr"`
|
||||
} `xml:"member"`
|
||||
} `xml:"datatype"`
|
||||
} `xml:"datatypes"`
|
||||
Enums struct {
|
||||
Enum []struct {
|
||||
Length string `xml:"length,attr"`
|
||||
Name string `xml:"name,attr"`
|
||||
Type string `xml:"type,attr"`
|
||||
Member []struct {
|
||||
Name string `xml:"name,attr"`
|
||||
Value int `xml:"value,attr"`
|
||||
} `xml:"member"`
|
||||
} `xml:"enum"`
|
||||
} `xml:"enums"`
|
||||
Functions struct {
|
||||
Function []struct {
|
||||
Address string `xml:"address,attr"`
|
||||
Length string `xml:"length,attr"`
|
||||
Name string `xml:"name,attr"`
|
||||
LineNumber []struct {
|
||||
Addr string `xml:"addr,attr"`
|
||||
End int `xml:"end,attr"`
|
||||
Length int `xml:"length,attr"`
|
||||
SourceFile string `xml:"source_file,attr"`
|
||||
Start int `xml:"start,attr"`
|
||||
} `xml:"line_number"`
|
||||
StackVariable []struct {
|
||||
Datatype string `xml:"datatype,attr"`
|
||||
Kind string `xml:"kind,attr"`
|
||||
Length string `xml:"length,attr"`
|
||||
Name string `xml:"name,attr"`
|
||||
Offset string `xml:"offset,attr"`
|
||||
} `xml:"stack_variable"`
|
||||
} `xml:"function"`
|
||||
} `xml:"functions"`
|
||||
Tables struct {
|
||||
Table []struct {
|
||||
Name string `xml:"name,attr"`
|
||||
Segment []struct {
|
||||
Address string `xml:"address,attr"`
|
||||
Number int `xml:"number,attr"`
|
||||
} `xml:"segment"`
|
||||
SourceFile []struct {
|
||||
ID string `xml:"id,attr"`
|
||||
Name string `xml:"name,attr"`
|
||||
} `xml:"source_file"`
|
||||
Symbol []struct {
|
||||
Address string `xml:"address,attr"`
|
||||
Datatype string `xml:"datatype,attr"`
|
||||
Index string `xml:"index,attr"`
|
||||
Kind string `xml:"kind,attr"`
|
||||
Length string `xml:"length,attr"`
|
||||
Name string `xml:"name,attr"`
|
||||
Tag string `xml:"tag,attr"`
|
||||
Undecorated string `xml:"undecorated,attr"`
|
||||
Value string `xml:"value,attr"`
|
||||
} `xml:"symbol"`
|
||||
} `xml:"table"`
|
||||
} `xml:"tables"`
|
||||
Typedefs struct {
|
||||
Typedef []struct {
|
||||
Basetype string `xml:"basetype,attr"`
|
||||
Name string `xml:"name,attr"`
|
||||
} `xml:"typedef"`
|
||||
} `xml:"typedefs"`
|
||||
}
|
||||
174
go/stringrecovery/stringrecovery.go
Normal file
174
go/stringrecovery/stringrecovery.go
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
package stringrecovery
|
||||
|
||||
import (
|
||||
"debug/macho"
|
||||
"debug/pe"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
charset_english = ` !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_abcdefghijklmnopqrstuvwxyz{|}~` + "`"
|
||||
lookup_table_english [256]bool
|
||||
)
|
||||
|
||||
type (
|
||||
Callback func(segment_name string, address uint64, token string)
|
||||
)
|
||||
|
||||
func init() {
|
||||
for _, c := range charset_english {
|
||||
lookup_table_english[c] = true
|
||||
}
|
||||
}
|
||||
|
||||
func recover_section(try_align bool, word_size uint64, minimum_length int, section_name string, virtual_address uint64, section_reader io.ReaderAt, callback Callback) (err error) {
|
||||
var (
|
||||
offset int64
|
||||
)
|
||||
|
||||
var (
|
||||
current_token strings.Builder
|
||||
current_token_offset int64
|
||||
)
|
||||
|
||||
for {
|
||||
var b [1]byte
|
||||
if _, err = section_reader.ReadAt(b[:], offset); err != nil {
|
||||
if err == io.EOF {
|
||||
err = nil
|
||||
break
|
||||
}
|
||||
}
|
||||
if b[0] == 0 {
|
||||
// if current_token != "", this is a 0-terminator
|
||||
// emit the token
|
||||
if current_token.Len() > 0 {
|
||||
if current_token.Len() < minimum_length {
|
||||
current_token.Reset()
|
||||
offset++
|
||||
continue
|
||||
}
|
||||
|
||||
align_offset := 0
|
||||
|
||||
if try_align {
|
||||
for i := uint64(current_token_offset); (i % word_size) != 0; i++ {
|
||||
current_token_offset++
|
||||
align_offset++
|
||||
}
|
||||
}
|
||||
|
||||
current_token_string := current_token.String()
|
||||
current_token_string = current_token_string[align_offset:]
|
||||
|
||||
callback(section_name, virtual_address+uint64(current_token_offset), current_token_string)
|
||||
current_token.Reset()
|
||||
}
|
||||
offset++
|
||||
continue
|
||||
}
|
||||
if lookup_table_english[b[0]] {
|
||||
if current_token.Len() == 0 {
|
||||
current_token_offset = offset
|
||||
}
|
||||
current_token.WriteByte(b[0])
|
||||
} else {
|
||||
// discard everything leaing up to this
|
||||
current_token.Reset()
|
||||
}
|
||||
offset++
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func recover_file_macho(word_size uint64, file *os.File, callback Callback) (err error) {
|
||||
var (
|
||||
macho_file *macho.File
|
||||
)
|
||||
macho_file, err = macho.NewFile(file)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for _, section := range macho_file.Sections {
|
||||
fmt.Fprintln(os.Stderr, "recovering", section.Name)
|
||||
switch section.Name {
|
||||
case "__cstring":
|
||||
if err = recover_section(false, word_size, 1, section.Name, section.Addr, section, callback); err != nil {
|
||||
return
|
||||
}
|
||||
case "__const":
|
||||
if err = recover_section(false, word_size, 4, section.Name, section.Addr, section, callback); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func recover_file_pe(file *os.File, callback Callback) (err error) {
|
||||
var (
|
||||
pe_file *pe.File
|
||||
)
|
||||
pe_file, err = pe.NewFile(file)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
image_base := uint64(0x400000)
|
||||
var word_size uint64
|
||||
|
||||
switch h := pe_file.OptionalHeader.(type) {
|
||||
case *pe.OptionalHeader32:
|
||||
word_size = 4
|
||||
image_base = uint64(h.ImageBase)
|
||||
case *pe.OptionalHeader64:
|
||||
word_size = 8
|
||||
image_base = h.ImageBase
|
||||
}
|
||||
|
||||
for _, section := range pe_file.Sections {
|
||||
fmt.Fprintln(os.Stderr, "recovering", section.Name)
|
||||
switch section.Name {
|
||||
case ".data":
|
||||
if err = recover_section(true, word_size, 4, section.Name, image_base+uint64(section.VirtualAddress), section, callback); err != nil {
|
||||
return
|
||||
}
|
||||
case ".rdata":
|
||||
if err = recover_section(true, word_size, 4, section.Name, image_base+uint64(section.VirtualAddress), section, callback); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func RecoverFile(filename string, callback Callback) (err error) {
|
||||
var file *os.File
|
||||
file, err = os.Open(filename)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
var magic [4]byte
|
||||
if _, err = file.ReadAt(magic[:], 0); err != nil {
|
||||
return
|
||||
}
|
||||
magic_number := binary.LittleEndian.Uint32(magic[:])
|
||||
if magic[0] == 'M' && magic[1] == 'Z' {
|
||||
err = recover_file_pe(file, callback)
|
||||
} else if magic_number == 0xfeedface {
|
||||
err = recover_file_macho(4, file, callback)
|
||||
} else if magic_number == 0xfeedfacf {
|
||||
err = recover_file_macho(8, file, callback)
|
||||
} else if magic_number == 0xcefaedfe {
|
||||
err = recover_file_macho(4, file, callback)
|
||||
} else {
|
||||
err = errors.New("unknown file magic: " + filename)
|
||||
}
|
||||
return
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue