ref: 10a5a6518d709c92df04ce5938d5fedf2c6df685
parent: 3233cead330abefa6d9d266f5dd5de5b58622901
author: Bryan Bishop <[email protected]>
date: Sun Jun 10 08:24:33 EDT 2012
somewhat improved disassembler
--- a/extras/gbz80disasm.py
+++ b/extras/gbz80disasm.py
@@ -6,15 +6,18 @@
from ctypes import c_int8
import json
import random
-from romstr import RomStr
spacing = "\t"
+class XRomStr(str):
+ def __repr__(self):
+ return "RomStr(too long)"
+
def load_rom(filename="../baserom.gbc"):
"""loads bytes into memory"""
global rom
file_handler = open(filename, "r")
- rom = RomStr(file_handler.read())
+ rom = XRomStr(file_handler.read())
file_handler.close()
return rom
--- a/extras/romstr.py
+++ b/extras/romstr.py
@@ -147,12 +147,10 @@
# [{"command": 0x20, "bytes": [0x20, 0x40, 0x50],
# "asm": "jp $5040", "label": "Unknown5040"}]
- asm_commands = []
+ asm_commands = {}
offset = start_address
- current_byte_number = 0
-
last_hl_address = None
last_a_address = None
used_3d97 = False
@@ -159,36 +157,45 @@
keep_reading = True
- # for labeling future bytes (like for relative jumps)
- byte_labels = {}
-
while offset <= end_address and keep_reading:
+ # read the current opcode byte
current_byte = ord(rom[offset])
-
- is_data = False
+ current_byte_number = len(asm_commands.keys())
- maybe_byte = current_byte
+ # setup this next/upcoming command
+ asm_command = {
+ "address": offset,
- # check if this byte has a label prior to it
- # and if not, generate a new label
- # This new label might not be used, so it will be
- # removed if the total usage is zero.
- if offset in byte_labels.keys():
- line_label = byte_labels[offset]["name"]
- byte_labels[offset]["usage"] += 1
- else:
- line_label = asm_label(offset)
- byte_labels[offset] = {}
- byte_labels[offset]["name"] = line_label
- byte_labels[offset]["usage"] = 0
- byte_labels[offset]["definition"] = True
+ # This counts how many times relative jumps reference this
+ # byte. This is used to determine whether or not to print out a
+ # label later.
+ "references": 0,
+ }
- #find out if there's a two byte key like this
- temp_maybe = maybe_byte
- temp_maybe += ( ord(rom[offset+1]) << 8)
- if temp_maybe in opt_table.keys() and ord(rom[offset+1])!=0:
- opstr = opt_table[temp_maybe][0].lower()
-
+ # some commands have two opcodes
+ next_byte = ord(rom[offset+1])
+
+ # all two-byte opcodes also have their first byte in there somewhere
+ if current_byte in opt_table.keys():
+ # this might be a two-byte opcode
+ possible_opcode = current_byte + (next_byte << 8)
+
+ # check if this is a two-byte opcode
+ if possible_opcode in opt_table.keys():
+ op_code = possible_opcode
+ else:
+ op_code = current_byte
+
+ op = opt_table[op_code]
+
+ opstr = op[0].lower()
+ optype = op[1]
+
+ asm_command["type"] = "op"
+ asm_command["id"] = op_code
+ asm_command["format"] = opstr
+ asm_command["opnumberthing"] = optype
+
if "x" in opstr:
for x in range(0, opstr.count("x")):
insertion = ord(rom[offset + 1])
@@ -196,8 +203,8 @@
opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower()
- current_byte += 1
- offset += 1
+ current_byte_number += 1
+ offset += 1
if "?" in opstr:
for y in range(0, opstr.count("?")):
@@ -213,145 +220,69 @@
current_byte_number += 2
offset += 2
-
- asm_commands.append({"address": offset, "command": opstr})
- output += spacing + opstr #+ " ; " + hex(offset)
- output += "\n"
-
- current_byte_number += 2
- offset += 2
- elif maybe_byte in opt_table.keys():
- op_code = opt_table[maybe_byte]
- op_code_type = op_code[1]
- op_code_byte = maybe_byte
-
- #type = -1 when it's the E op
- #if op_code_type != -1:
- if op_code_type == 0 and ord(rom[offset]) == op_code_byte:
- op_str = op_code[0].lower()
-
- output += spacing + op_code[0].lower() #+ " ; " + hex(offset)
- output += "\n"
-
- offset += 1
- current_byte_number += 1
- elif op_code_type == 1 and ord(rom[offset]) == op_code_byte:
- oplen = len(op_code[0])
- opstr = copy(op_code[0])
- xes = op_code[0].count("x")
- include_comment = False
- for x in range(0, xes):
- insertion = ord(rom[offset + 1])
- insertion = "$" + hex(insertion)[2:]
-
- if current_byte == 0x18 or current_byte==0x20 or current_byte in relative_jumps: #jr or jr nz
- #generate a label for the byte we're jumping to
- target_address = offset + 2 + c_int8(ord(rom[offset + 1])).value
- if target_address in byte_labels.keys():
- byte_labels[target_address]["usage"] = 1 + byte_labels[target_address]["usage"]
- line_label2 = byte_labels[target_address]["name"]
- else:
- line_label2 = asm_label(target_address)
- byte_labels[target_address] = {}
- byte_labels[target_address]["name"] = line_label2
- byte_labels[target_address]["usage"] = 1
- byte_labels[target_address]["definition"] = False
-
- insertion = line_label2.lower()
- include_comment = True
- elif current_byte == 0x3e:
- last_a_address = ord(rom[offset + 1])
-
- opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower()
- output += spacing + opstr
- if include_comment:
- output += " ; " + hex(offset)
- if current_byte in relative_jumps:
- output += " $" + hex(ord(rom[offset + 1]))[2:]
- output += "\n"
-
- current_byte_number += 1
- offset += 1
- insertion = ""
-
- current_byte_number += 1
- offset += 1
- include_comment = False
-
- elif op_code_type == 2 and ord(rom[offset]) == op_code_byte:
- oplen = len(op_code[0])
- opstr = copy(op_code[0])
- qes = op_code[0].count("?")
- for x in range(0, qes):
- byte1 = ord(rom[offset + 1])
- byte2 = ord(rom[offset + 2])
-
- number = byte1
- number += byte2 << 8;
-
- insertion = "$%.4x" % (number)
- if maybe_byte in call_commands or current_byte in relative_unconditional_jumps or current_byte in relative_jumps:
- result = find_label(insertion, bank_id)
- if result != None:
- insertion = result
-
- opstr = opstr[:opstr.find("?")].lower() + insertion + opstr[opstr.find("?")+1:].lower()
- output += spacing + opstr #+ " ; " + hex(offset)
- output += "\n"
-
- current_byte_number += 2
- offset += 2
-
- current_byte_number += 1
- offset += 1
-
- if current_byte == 0x21:
- last_hl_address = byte1 + (byte2 << 8)
- if current_byte == 0xcd:
- if number == 0x3d97: used_3d97 = True
- #duck out if this is jp $24d7
- if current_byte == 0xc3 or current_byte in relative_unconditional_jumps:
- if current_byte == 0xc3:
- if number == 0x3d97: used_3d97 = True
- #if number == 0x24d7: #jp
- if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset):
- keep_reading = False
- is_data = False
- break
- else:
- is_data = True
-
- #stop reading at a jump, relative jump or return
+
+ # Check for relative jumps, construct the formatted asm line.
+ # Also set the usage of labels.
+ if current_byte in [0x18, 0x20] or current_byte in relative_jumps: # jr or jr nz
+ # generate a label for the byte we're jumping to
+ target_address = offset + 2 + c_int8(ord(rom[offset + 1])).value
+
+ if target_address in byte_labels.keys():
+ byte_labels[target_address]["usage"] = 1 + byte_labels[target_address]["usage"]
+ line_label2 = byte_labels[target_address]["name"]
+
+ else:
+ line_label2 = asm_label(target_address)
+ byte_labels[target_address] = {}
+ byte_labels[target_address]["name"] = line_label2
+ byte_labels[target_address]["usage"] = 1
+ byte_labels[target_address]["definition"] = False
+
+ insertion = line_label2.lower()
+ include_comment = True
+ elif current_byte == 0x3e:
+ last_a_address = ord(rom[offset + 1])
+
+ # store the formatted string for the output later
+ asm_command["formatted"] = opstr
+
+ if current_byte == 0x21:
+ last_hl_address = byte1 + (byte2 << 8)
+
+ # this is leftover from pokered, might be meaningless
+ if current_byte == 0xcd:
+ if number == 0x3d97:
+ used_3d97 = True
+
+ if current_byte == 0xc3 or current_byte in relative_unconditional_jumps:
+ if current_byte == 0xc3:
+ if number == 0x3d97:
+ used_3d97 = True
+
+ if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset):
+ keep_reading = False
+ break
+
+ # stop reading at a jump, relative jump or return
if current_byte in end_08_scripts_with:
+ is_data = False
+
if not has_outstanding_labels(byte_labels) and all_outstanding_labels_are_reverse(byte_labels, offset):
keep_reading = False
- is_data = False #cleanup
break
else:
- is_data = False
keep_reading = True
else:
- is_data = False
keep_reading = True
+
else:
- #if is_data and keep_reading:
- output += spacing + "db $" + hex(ord(rom[offset]))[2:] #+ " ; " + hex(offset)
- output += "\n"
- offset += 1
- current_byte_number += 1
- #else the while loop would have spit out the opcode
-
- #these two are done prior
- #offset += 1
- #current_byte_number += 1
-
- # clean up unused labels.. used to be in 'output', but is now in asm_commands
- for label_line in byte_labels.keys():
- address = label_line
- label_line = byte_labels[label_line]
- if label_line["usage"] == 0:
- output = output.replace((label_line["name"] + "\n").lower(), "")
- raise NotImplementedError
+ # This shouldn't really happen, and means that this area of the
+ # ROM probably doesn't represent instructions.
+ asm_command["type"] = "data" # db
+ asm_command["value"] = current_byte
+
+ # save this new command in the list
+ asm_commands[current_byte_number] = asm_command
def __str__(self):
""" ASM pretty printer.