mirror of
https://github.com/thunderbrewhq/binana.git
synced 2026-04-27 03:03:52 +00:00
375 lines
14 KiB
Python
375 lines
14 KiB
Python
#Extended Binana symbols importer script
|
|
# @runtime Jython
|
|
# @category Binana
|
|
# @author Thunderbrew
|
|
# @menupath
|
|
# @toolbar logo.png
|
|
|
|
import re
|
|
import string
|
|
|
|
from ghidra.app.cmd.function import ApplyFunctionSignatureCmd
|
|
from ghidra.app.util.parser import FunctionSignatureParser
|
|
from ghidra.util.data import DataTypeParser
|
|
from ghidra.program.model.symbol import SourceType
|
|
from ghidra.program.model.listing import Function, ParameterImpl, VariableStorage
|
|
|
|
def find_storage_parameter(str):
|
|
match = re.search(r'@<(\w+)?>', str)
|
|
|
|
if match:
|
|
return match.group(1)
|
|
return None
|
|
|
|
def strip_storage_parameter(str):
|
|
pattern = r'@<[^>]+>'
|
|
|
|
# Use re.sub to replace the match with an empty string
|
|
return re.sub(pattern, '', str)
|
|
|
|
def split_function_parameters(str):
|
|
SCAN_FUNC = 1
|
|
SCAN_PARAMETER = 2
|
|
SCAN_ESCAPE_PAREN = 3
|
|
|
|
current_word = ''
|
|
parameters = []
|
|
state = SCAN_FUNC
|
|
paren_level = 0
|
|
i = 0
|
|
while i < len(str):
|
|
c = str[i]
|
|
|
|
if state == SCAN_FUNC:
|
|
if c == '(':
|
|
state = SCAN_PARAMETER
|
|
elif state == SCAN_PARAMETER:
|
|
if c == '(':
|
|
state = SCAN_ESCAPE_PAREN
|
|
paren_level = 1
|
|
current_word = current_word + c
|
|
elif c == ',':
|
|
parameters.append(current_word.strip())
|
|
current_word = ''
|
|
elif c == ')':
|
|
parameters.append(current_word.strip())
|
|
current_word = ''
|
|
break
|
|
else:
|
|
current_word = current_word + c
|
|
elif state == SCAN_ESCAPE_PAREN:
|
|
current_word = current_word + c
|
|
if c == '(':
|
|
paren_level = paren_level + 1
|
|
elif c == ')':
|
|
paren_level = paren_level - 1
|
|
|
|
if paren_level == 0:
|
|
state = SCAN_PARAMETER
|
|
|
|
i = i + 1
|
|
|
|
if current_word != '':
|
|
parameters.append(current_word.strip())
|
|
return parameters
|
|
|
|
# return: <calling convention> <optional, this ptr type> <stripped type string>, <map of parameter index numbers to register names>
|
|
def strip_function_type(str):
|
|
# str = int32_t __stdcall func@<eax>(int32_t x@<edi>)
|
|
|
|
parameter_storage = {}
|
|
|
|
this_ptr_type = ''
|
|
|
|
# [ 'int32_t', '__stdcall', 'func@<eax>(int32_t x@<edi>)' ]
|
|
str_parts = str.split(' ')
|
|
# int32_t
|
|
return_type = str_parts[0]
|
|
|
|
# default
|
|
call_conv = '__stdcall'
|
|
|
|
i = 0
|
|
last_spec = -1
|
|
func_start = -1
|
|
while i < len(str_parts):
|
|
if str_parts[i].startswith('__'):
|
|
if str_parts[i].endswith('call'):
|
|
call_conv = str_parts[i]
|
|
last_spec = i
|
|
elif str_parts[i].startswith('func'):
|
|
func_start = i
|
|
break
|
|
i = i + 1
|
|
|
|
function_call = ' '.join(str_parts[func_start:])
|
|
|
|
# [ 'func@<eax>', 'int32_t x@<edi>)' ]
|
|
func_before_after_paren = function_call.split('(', 1)
|
|
# func@<eax>
|
|
func_id = func_before_after_paren[0]
|
|
# [ 'int32_t x@<edi>' ]
|
|
func_parameters = split_function_parameters(function_call)
|
|
|
|
return_parameter_storage = find_storage_parameter(func_id)
|
|
if return_parameter_storage is not None:
|
|
parameter_storage[0] = return_parameter_storage
|
|
func_id = strip_storage_parameter(func_id)
|
|
|
|
# start building stripped function type
|
|
stripped_type = return_type
|
|
stripped_type += " "
|
|
stripped_type += func_id
|
|
|
|
stripped_type += '('
|
|
|
|
n = 1
|
|
first = True
|
|
for argument_parameter in func_parameters:
|
|
if '__return_ptr' in argument_parameter:
|
|
argument_parameter = argument_parameter.replace('__return_ptr ', '')
|
|
|
|
if n == 1 and call_conv == '__thiscall':
|
|
# in Ghidra, a this pointer is always added to the signature
|
|
# we need only record the the type for later
|
|
this_ptr_type = argument_parameter.split(' ', 1)[0]
|
|
# n = n + 1
|
|
# continue
|
|
#
|
|
# commented out: let's see if this fixed it
|
|
|
|
if not first:
|
|
stripped_type += ', '
|
|
else:
|
|
first = False
|
|
argument_parameter_storage = find_storage_parameter(argument_parameter)
|
|
if argument_parameter_storage is not None:
|
|
parameter_storage[n] = argument_parameter_storage
|
|
argument_parameter = strip_storage_parameter(argument_parameter)
|
|
stripped_type += argument_parameter
|
|
n = n + 1
|
|
stripped_type += ')'
|
|
# todo strip parameters
|
|
return call_conv, this_ptr_type, stripped_type, parameter_storage
|
|
|
|
def parse_attributes(str):
|
|
attributes = {}
|
|
current_key = ''
|
|
current_value = ''
|
|
|
|
SCAN_ATTRIBUTES = 0
|
|
SCAN_KEY = 1
|
|
SCAN_VALUE = 2
|
|
|
|
i = 0
|
|
state = SCAN_ATTRIBUTES
|
|
quote = False
|
|
while i < len(str):
|
|
c = str[i]
|
|
i = i + 1
|
|
if state == SCAN_ATTRIBUTES:
|
|
if c != ' ':
|
|
current_key = c
|
|
state = SCAN_KEY
|
|
elif state == SCAN_KEY:
|
|
if c == ' ':
|
|
# the key terminated early with a space
|
|
# this is valid and means it is a boolean attribute
|
|
state = SCAN_ATTRIBUTES
|
|
attributes[current_key] = True
|
|
current_key = ''
|
|
elif c == '=':
|
|
state = SCAN_VALUE
|
|
else:
|
|
current_key = current_key + c
|
|
elif state == SCAN_VALUE:
|
|
if quote:
|
|
if c == '"':
|
|
attributes[current_key] = current_value
|
|
current_key = ''
|
|
current_value = ''
|
|
state = SCAN_ATTRIBUTES
|
|
else:
|
|
current_value = current_value + c
|
|
else:
|
|
if c == '"':
|
|
quote = True
|
|
elif c == ' ':
|
|
attributes[current_key] = current_value
|
|
current_key = ''
|
|
current_value = ''
|
|
state = SCAN_ATTRIBUTES
|
|
else:
|
|
current_value = current_value + c
|
|
# the line terminated in the middle of scanning a key
|
|
# that means it's a boolean attribute
|
|
if state == SCAN_KEY:
|
|
attributes[current_key] = True
|
|
|
|
return attributes
|
|
|
|
def parse_symbol_entry(line):
|
|
pieces = line.split(' ', 3)
|
|
print(len(pieces))
|
|
if len(pieces) < 3:
|
|
return None
|
|
|
|
entry = {}
|
|
entry['label'] = pieces[0]
|
|
entry['address'] = pieces[1]
|
|
entry['kind'] = pieces[2]
|
|
entry['comment'] = ''
|
|
entry['attributes'] = {}
|
|
|
|
if len(pieces) > 3:
|
|
et_cetera = pieces[3]
|
|
attributes = et_cetera
|
|
index_of_comment_separator = et_cetera.find(';')
|
|
if index_of_comment_separator != -1:
|
|
entry['comment'] = et_cetera[index_of_comment_separator+1:].lstrip(' ')
|
|
attributes = et_cetera[:index_of_comment_separator]
|
|
attributes = attributes.rstrip(' ')
|
|
entry['attributes'] = parse_attributes(attributes)
|
|
|
|
return entry
|
|
|
|
def parse_datatype_string(dt_string):
|
|
# Get the current program's data type manager
|
|
dtm = currentProgram.getDataTypeManager()
|
|
|
|
# Initialize the parser using the program's context
|
|
# Allowed forms: HIDDEN, READ_ONLY, or FULL (updates DTM)
|
|
parser = DataTypeParser(dtm, dtm, None, DataTypeParser.AllowedDataTypes.ALL)
|
|
|
|
try:
|
|
# Parse the string into a DataType object
|
|
parsed_dt = parser.parse(dt_string)
|
|
print("Successfully parsed: {} as {}".format(dt_string, parsed_dt.getName()))
|
|
return parsed_dt
|
|
except Exception as e:
|
|
print("Error parsing '{}': {}".format(dt_string, e))
|
|
return None
|
|
|
|
functionManager = currentProgram.getFunctionManager()
|
|
|
|
f = askFile("Navigate to the Binana all.sym file", "Go")
|
|
|
|
def apply_function_symbol(entry):
|
|
name = entry['label']
|
|
address = toAddr(entry['address'])
|
|
|
|
func = functionManager.getFunctionAt(address)
|
|
|
|
if func is not None:
|
|
old_name = func.getName()
|
|
func.setName(name, SourceType.USER_DEFINED)
|
|
print("Renamed function {} to {} at address {}".format(old_name, name, address))
|
|
else:
|
|
func = createFunction(address, name)
|
|
print("Created function {} at address {}".format(name, address))
|
|
|
|
func_type = entry['attributes'].get('type')
|
|
if func_type is not None:
|
|
calling_convention, this_ptr_type, stripped_func_type, parameter_storage = strip_function_type(func_type)
|
|
|
|
# 4. Initialize the parser
|
|
# We pass 'None' for the DataTypeManagerService as it's not strictly required here
|
|
parser = FunctionSignatureParser(currentProgram.getDataTypeManager(), None)
|
|
|
|
print('applying signature: {}'.format(stripped_func_type))
|
|
|
|
func_signature = parser.parse(None, stripped_func_type)
|
|
if calling_convention == '__usercall':
|
|
calling_convention = '__stdcall'
|
|
|
|
# if calling_convention == '__thiscall':
|
|
# this_ptr_datatype = findDataType(this_ptr_type)
|
|
# func_signature.replaceArgument(0, 'this', this_ptr_datatype, '', SourceType.USER_DEFINED)
|
|
|
|
# apply this information to the function
|
|
func_signature.setCallingConvention(calling_convention)
|
|
cmd = ApplyFunctionSignatureCmd(address, func_signature, SourceType.USER_DEFINED)
|
|
|
|
if cmd.applyTo(currentProgram):
|
|
print("Success! Applied signature '{}' to {}".format(entry['label'], entry['address']))
|
|
else:
|
|
print("Failed to apply signature. Reason: {}".format(cmd.getStatusMsg()))
|
|
|
|
# if the function is a class method, the storage has to be modified
|
|
# or if the function passes certain arguments by register, in violation of standard calling convention
|
|
if len(parameter_storage) != 0 or calling_convention == '__thiscall':
|
|
# because we have to do this, everything is now manual
|
|
func.setCustomVariableStorage(True)
|
|
|
|
parameters = func.getParameters()
|
|
# check if changing return storage is needed
|
|
if parameter_storage.get(0) is not None:
|
|
return_register_storage = currentProgram.getRegister(parameter_storage[0].upper())
|
|
return_storage = VariableStorage(currentProgram, return_register_storage)
|
|
func.setReturn(func_signature.getReturnType(), return_storage, SourceType.USER_DEFINED)
|
|
|
|
# TODO: adjust this for different architectures
|
|
# only __usercall and __stdcall can pass arguments through stack by default
|
|
stack_offset = 4
|
|
stack_alignment = 4
|
|
|
|
parameter_index = 0
|
|
# fix this ptr storage
|
|
if calling_convention == '__thiscall':
|
|
this_ptr_datatype = parse_datatype_string(this_ptr_type)
|
|
# TODO: fix for other architectures
|
|
# this_parameter_register = currentProgram.getRegister('ECX')
|
|
# this_parameter_storage = VariableStorage(currentProgram, this_parameter_register)
|
|
# parameters[0] = ParameterImpl(parameters[0].getName(), this_ptr_datatype, this_parameter_storage, currentProgram)
|
|
parameters[0] = ParameterImpl(parameters[0].getName(), this_ptr_datatype, parameters[0].getVariableStorage(), currentProgram)
|
|
parameter_index = 1
|
|
# fix storage of main parameters
|
|
while parameter_index < len(parameters):
|
|
if parameter_storage.get(1+parameter_index) is not None:
|
|
# this parameter wants to be stored in a register
|
|
parameter_register = currentProgram.getRegister(parameter_storage[1+parameter_index].upper())
|
|
parameter_variable_storage = VariableStorage(currentProgram, parameter_register)
|
|
parameters[parameter_index] = ParameterImpl(parameters[parameter_index].getName(), parameters[parameter_index].getDataType(), parameter_variable_storage, currentProgram)
|
|
else:
|
|
# this parameter wants to be passed in the stack
|
|
parameter_data_type = parameters[parameter_index].getDataType()
|
|
parameter_size = parameter_data_type.getLength()
|
|
parameter_variable_storage = VariableStorage(currentProgram, stack_offset, parameter_size)
|
|
parameters[parameter_index] = ParameterImpl(parameters[parameter_index].getName(), parameters[parameter_index].getDataType(), parameter_variable_storage, currentProgram)
|
|
if parameter_size % stack_alignment != 0:
|
|
stack_offset = stack_offset + (parameter_size + (parameter_size - (parameter_size % stack_alignment)))
|
|
else:
|
|
stack_offset = stack_offset + parameter_size
|
|
parameter_index = parameter_index + 1
|
|
|
|
func.replaceParameters(Function.FunctionUpdateType.CUSTOM_STORAGE, True, SourceType.USER_DEFINED, parameters)
|
|
|
|
def apply_data_symbol(entry):
|
|
address = toAddr(entry['address'])
|
|
|
|
# st = currentProgram.getSymbolTable()
|
|
# existing_symbols = st.getSymbols(address)
|
|
# for symbol in existing_symbols:
|
|
# st.removeSymbolSpecial(symbol)
|
|
# print("Removed existing label: {}".format(symbol.getName()))
|
|
|
|
print("Created label {} at address {}".format(entry['label'], entry['address']))
|
|
createLabel(address, entry['label'], True)
|
|
|
|
if entry['attributes'].get('type') is not None:
|
|
data_type = parse_datatype_string(entry['attributes']['type'])
|
|
data_type_size = data_type.getLength()
|
|
if data_type is not None:
|
|
# remove existing defined data at address
|
|
clearListing(address, address.add(data_type_size - 1))
|
|
createData(address, data_type)
|
|
print("applied data type {} to label {}".format(entry['attributes']['type'], entry['label']))
|
|
|
|
|
|
for line in file(f.absolutePath): # note, cannot use open(), since that is in GhidraScript
|
|
entry = parse_symbol_entry(line)
|
|
if entry is not None:
|
|
if entry['kind'] == 'f':
|
|
apply_function_symbol(entry)
|
|
elif entry['kind'] == 'l':
|
|
apply_data_symbol(entry)
|