From 105a314f749bc4a0662851fbcacbeb778c7ae19e Mon Sep 17 00:00:00 2001 From: superp00t Date: Sun, 12 Apr 2026 11:37:42 -0400 Subject: [PATCH] feat(ghidra): add SuperImportSymbolsScript.py --- ghidra/SuperImportSymbolsScript.py | 375 +++++++++++++++++++++++++++++ 1 file changed, 375 insertions(+) create mode 100644 ghidra/SuperImportSymbolsScript.py diff --git a/ghidra/SuperImportSymbolsScript.py b/ghidra/SuperImportSymbolsScript.py new file mode 100644 index 0000000..c1c6582 --- /dev/null +++ b/ghidra/SuperImportSymbolsScript.py @@ -0,0 +1,375 @@ +#Extended Binana symbols importer script +# @runtime Jython +# @category Binana +# @author Thunderbrew +# @menupath +# @toolbar logo.png + +import re +import string + +from ghidra.app.cmd.function import ApplyFunctionSignatureCmd +from ghidra.app.util.parser import FunctionSignatureParser +from ghidra.util.data import DataTypeParser +from ghidra.program.model.symbol import SourceType +from ghidra.program.model.listing import Function, ParameterImpl, VariableStorage + +def find_storage_parameter(str): + match = re.search(r'@<(\w+)?>', str) + + if match: + return match.group(1) + return None + +def strip_storage_parameter(str): + pattern = r'@<[^>]+>' + + # Use re.sub to replace the match with an empty string + return re.sub(pattern, '', str) + +def split_function_parameters(str): + SCAN_FUNC = 1 + SCAN_PARAMETER = 2 + SCAN_ESCAPE_PAREN = 3 + + current_word = '' + parameters = [] + state = SCAN_FUNC + paren_level = 0 + i = 0 + while i < len(str): + c = str[i] + + if state == SCAN_FUNC: + if c == '(': + state = SCAN_PARAMETER + elif state == SCAN_PARAMETER: + if c == '(': + state = SCAN_ESCAPE_PAREN + paren_level = 1 + current_word = current_word + c + elif c == ',': + parameters.append(current_word.strip()) + current_word = '' + elif c == ')': + parameters.append(current_word.strip()) + current_word = '' + break + else: + current_word = current_word + c + elif state == SCAN_ESCAPE_PAREN: + current_word = current_word + c + if c == '(': + paren_level = paren_level + 1 + elif c == ')': + paren_level = paren_level - 1 + + if paren_level == 0: + state = SCAN_PARAMETER + + i = i + 1 + + if current_word != '': + parameters.append(current_word.strip()) + return parameters + +# return: , +def strip_function_type(str): + # str = int32_t __stdcall func@(int32_t x@) + + parameter_storage = {} + + this_ptr_type = '' + + # [ 'int32_t', '__stdcall', 'func@(int32_t x@)' ] + str_parts = str.split(' ') + # int32_t + return_type = str_parts[0] + + # default + call_conv = '__stdcall' + + i = 0 + last_spec = -1 + func_start = -1 + while i < len(str_parts): + if str_parts[i].startswith('__'): + if str_parts[i].endswith('call'): + call_conv = str_parts[i] + last_spec = i + elif str_parts[i].startswith('func'): + func_start = i + break + i = i + 1 + + function_call = ' '.join(str_parts[func_start:]) + + # [ 'func@', 'int32_t x@)' ] + func_before_after_paren = function_call.split('(', 1) + # func@ + func_id = func_before_after_paren[0] + # [ 'int32_t x@' ] + func_parameters = split_function_parameters(function_call) + + return_parameter_storage = find_storage_parameter(func_id) + if return_parameter_storage is not None: + parameter_storage[0] = return_parameter_storage + func_id = strip_storage_parameter(func_id) + + # start building stripped function type + stripped_type = return_type + stripped_type += " " + stripped_type += func_id + + stripped_type += '(' + + n = 1 + first = True + for argument_parameter in func_parameters: + if '__return_ptr' in argument_parameter: + argument_parameter = argument_parameter.replace('__return_ptr ', '') + + if n == 1 and call_conv == '__thiscall': + # in Ghidra, a this pointer is always added to the signature + # we need only record the the type for later + this_ptr_type = argument_parameter.split(' ', 1)[0] + # n = n + 1 + # continue + # + # commented out: let's see if this fixed it + + if not first: + stripped_type += ', ' + else: + first = False + argument_parameter_storage = find_storage_parameter(argument_parameter) + if argument_parameter_storage is not None: + parameter_storage[n] = argument_parameter_storage + argument_parameter = strip_storage_parameter(argument_parameter) + stripped_type += argument_parameter + n = n + 1 + stripped_type += ')' + # todo strip parameters + return call_conv, this_ptr_type, stripped_type, parameter_storage + +def parse_attributes(str): + attributes = {} + current_key = '' + current_value = '' + + SCAN_ATTRIBUTES = 0 + SCAN_KEY = 1 + SCAN_VALUE = 2 + + i = 0 + state = SCAN_ATTRIBUTES + quote = False + while i < len(str): + c = str[i] + i = i + 1 + if state == SCAN_ATTRIBUTES: + if c != ' ': + current_key = c + state = SCAN_KEY + elif state == SCAN_KEY: + if c == ' ': + # the key terminated early with a space + # this is valid and means it is a boolean attribute + state = SCAN_ATTRIBUTES + attributes[current_key] = True + current_key = '' + elif c == '=': + state = SCAN_VALUE + else: + current_key = current_key + c + elif state == SCAN_VALUE: + if quote: + if c == '"': + attributes[current_key] = current_value + current_key = '' + current_value = '' + state = SCAN_ATTRIBUTES + else: + current_value = current_value + c + else: + if c == '"': + quote = True + elif c == ' ': + attributes[current_key] = current_value + current_key = '' + current_value = '' + state = SCAN_ATTRIBUTES + else: + current_value = current_value + c + # the line terminated in the middle of scanning a key + # that means it's a boolean attribute + if state == SCAN_KEY: + attributes[current_key] = True + + return attributes + +def parse_symbol_entry(line): + pieces = line.split(' ', 3) + print(len(pieces)) + if len(pieces) < 3: + return None + + entry = {} + entry['label'] = pieces[0] + entry['address'] = pieces[1] + entry['kind'] = pieces[2] + entry['comment'] = '' + entry['attributes'] = {} + + if len(pieces) > 3: + et_cetera = pieces[3] + attributes = et_cetera + index_of_comment_separator = et_cetera.find(';') + if index_of_comment_separator != -1: + entry['comment'] = et_cetera[index_of_comment_separator+1:].lstrip(' ') + attributes = et_cetera[:index_of_comment_separator] + attributes = attributes.rstrip(' ') + entry['attributes'] = parse_attributes(attributes) + + return entry + +def parse_datatype_string(dt_string): + # Get the current program's data type manager + dtm = currentProgram.getDataTypeManager() + + # Initialize the parser using the program's context + # Allowed forms: HIDDEN, READ_ONLY, or FULL (updates DTM) + parser = DataTypeParser(dtm, dtm, None, DataTypeParser.AllowedDataTypes.ALL) + + try: + # Parse the string into a DataType object + parsed_dt = parser.parse(dt_string) + print("Successfully parsed: {} as {}".format(dt_string, parsed_dt.getName())) + return parsed_dt + except Exception as e: + print("Error parsing '{}': {}".format(dt_string, e)) + return None + +functionManager = currentProgram.getFunctionManager() + +f = askFile("Navigate to the Binana all.sym file", "Go") + +def apply_function_symbol(entry): + name = entry['label'] + address = toAddr(entry['address']) + + func = functionManager.getFunctionAt(address) + + if func is not None: + old_name = func.getName() + func.setName(name, SourceType.USER_DEFINED) + print("Renamed function {} to {} at address {}".format(old_name, name, address)) + else: + func = createFunction(address, name) + print("Created function {} at address {}".format(name, address)) + + func_type = entry['attributes'].get('type') + if func_type is not None: + calling_convention, this_ptr_type, stripped_func_type, parameter_storage = strip_function_type(func_type) + + # 4. Initialize the parser + # We pass 'None' for the DataTypeManagerService as it's not strictly required here + parser = FunctionSignatureParser(currentProgram.getDataTypeManager(), None) + + print('applying signature: {}'.format(stripped_func_type)) + + func_signature = parser.parse(None, stripped_func_type) + if calling_convention == '__usercall': + calling_convention = '__stdcall' + + # if calling_convention == '__thiscall': + # this_ptr_datatype = findDataType(this_ptr_type) + # func_signature.replaceArgument(0, 'this', this_ptr_datatype, '', SourceType.USER_DEFINED) + + # apply this information to the function + func_signature.setCallingConvention(calling_convention) + cmd = ApplyFunctionSignatureCmd(address, func_signature, SourceType.USER_DEFINED) + + if cmd.applyTo(currentProgram): + print("Success! Applied signature '{}' to {}".format(entry['label'], entry['address'])) + else: + print("Failed to apply signature. Reason: {}".format(cmd.getStatusMsg())) + + # if the function is a class method, the storage has to be modified + # or if the function passes certain arguments by register, in violation of standard calling convention + if len(parameter_storage) != 0 or calling_convention == '__thiscall': + # because we have to do this, everything is now manual + func.setCustomVariableStorage(True) + + parameters = func.getParameters() + # check if changing return storage is needed + if parameter_storage.get(0) is not None: + return_register_storage = currentProgram.getRegister(parameter_storage[0].upper()) + return_storage = VariableStorage(currentProgram, return_register_storage) + func.setReturn(func_signature.getReturnType(), return_storage, SourceType.USER_DEFINED) + + # TODO: adjust this for different architectures + # only __usercall and __stdcall can pass arguments through stack by default + stack_offset = 4 + stack_alignment = 4 + + parameter_index = 0 + # fix this ptr storage + if calling_convention == '__thiscall': + this_ptr_datatype = parse_datatype_string(this_ptr_type) + # TODO: fix for other architectures + # this_parameter_register = currentProgram.getRegister('ECX') + # this_parameter_storage = VariableStorage(currentProgram, this_parameter_register) + # parameters[0] = ParameterImpl(parameters[0].getName(), this_ptr_datatype, this_parameter_storage, currentProgram) + parameters[0] = ParameterImpl(parameters[0].getName(), this_ptr_datatype, parameters[0].getVariableStorage(), currentProgram) + parameter_index = 1 + # fix storage of main parameters + while parameter_index < len(parameters): + if parameter_storage.get(1+parameter_index) is not None: + # this parameter wants to be stored in a register + parameter_register = currentProgram.getRegister(parameter_storage[1+parameter_index].upper()) + parameter_variable_storage = VariableStorage(currentProgram, parameter_register) + parameters[parameter_index] = ParameterImpl(parameters[parameter_index].getName(), parameters[parameter_index].getDataType(), parameter_variable_storage, currentProgram) + else: + # this parameter wants to be passed in the stack + parameter_data_type = parameters[parameter_index].getDataType() + parameter_size = parameter_data_type.getLength() + parameter_variable_storage = VariableStorage(currentProgram, stack_offset, parameter_size) + parameters[parameter_index] = ParameterImpl(parameters[parameter_index].getName(), parameters[parameter_index].getDataType(), parameter_variable_storage, currentProgram) + if parameter_size % stack_alignment != 0: + stack_offset = stack_offset + (parameter_size + (parameter_size - (parameter_size % stack_alignment))) + else: + stack_offset = stack_offset + parameter_size + parameter_index = parameter_index + 1 + + func.replaceParameters(Function.FunctionUpdateType.CUSTOM_STORAGE, True, SourceType.USER_DEFINED, parameters) + +def apply_data_symbol(entry): + address = toAddr(entry['address']) + + # st = currentProgram.getSymbolTable() + # existing_symbols = st.getSymbols(address) + # for symbol in existing_symbols: + # st.removeSymbolSpecial(symbol) + # print("Removed existing label: {}".format(symbol.getName())) + + print("Created label {} at address {}".format(entry['label'], entry['address'])) + createLabel(address, entry['label'], True) + + if entry['attributes'].get('type') is not None: + data_type = parse_datatype_string(entry['attributes']['type']) + data_type_size = data_type.getLength() + if data_type is not None: + # remove existing defined data at address + clearListing(address, address.add(data_type_size - 1)) + createData(address, data_type) + print("applied data type {} to label {}".format(entry['attributes']['type'], entry['label'])) + + +for line in file(f.absolutePath): # note, cannot use open(), since that is in GhidraScript + entry = parse_symbol_entry(line) + if entry is not None: + if entry['kind'] == 'f': + apply_function_symbol(entry) + elif entry['kind'] == 'l': + apply_data_symbol(entry)