shithub: pokecrystal

Download patch

ref: 10a5a6518d709c92df04ce5938d5fedf2c6df685
parent: 3233cead330abefa6d9d266f5dd5de5b58622901
author: Bryan Bishop <[email protected]>
date: Sun Jun 10 08:24:33 EDT 2012

somewhat improved disassembler

--- a/extras/gbz80disasm.py
+++ b/extras/gbz80disasm.py
@@ -6,15 +6,18 @@
 from ctypes import c_int8
 import json
 import random
-from romstr import RomStr
 
 spacing = "\t"
 
+class XRomStr(str):
+    def __repr__(self):
+        return "RomStr(too long)"
+
 def load_rom(filename="../baserom.gbc"):
     """loads bytes into memory"""
     global rom
     file_handler = open(filename, "r") 
-    rom = RomStr(file_handler.read())
+    rom = XRomStr(file_handler.read())
     file_handler.close()
     return rom
 
--- a/extras/romstr.py
+++ b/extras/romstr.py
@@ -147,12 +147,10 @@
 
         # [{"command": 0x20, "bytes": [0x20, 0x40, 0x50],
         # "asm": "jp $5040", "label": "Unknown5040"}]
-        asm_commands = []
+        asm_commands = {}
 
         offset = start_address
         
-        current_byte_number = 0
-        
         last_hl_address = None
         last_a_address  = None
         used_3d97       = False
@@ -159,36 +157,45 @@
 
         keep_reading    = True
 
-        # for labeling future bytes (like for relative jumps)
-        byte_labels = {}
-
         while offset <= end_address and keep_reading:
+            # read the current opcode byte
             current_byte = ord(rom[offset])
-
-            is_data = False
+            current_byte_number = len(asm_commands.keys())
             
-            maybe_byte = current_byte
+            # setup this next/upcoming command
+            asm_command = {
+                "address": offset,
 
-            # check if this byte has a label prior to it
-            # and if not, generate a new label
-            # This new label might not be used, so it will be
-            # removed if the total usage is zero.
-            if offset in byte_labels.keys():
-                line_label = byte_labels[offset]["name"]
-                byte_labels[offset]["usage"] += 1
-            else:
-                line_label = asm_label(offset)
-                byte_labels[offset] = {}
-                byte_labels[offset]["name"] = line_label
-                byte_labels[offset]["usage"] = 0 
-            byte_labels[offset]["definition"] = True
+                # This counts how many times relative jumps reference this
+                # byte. This is used to determine whether or not to print out a
+                # label later.
+                "references": 0,
+            }
 
-            #find out if there's a two byte key like this
-            temp_maybe = maybe_byte
-            temp_maybe += ( ord(rom[offset+1]) << 8)
-            if temp_maybe in opt_table.keys() and ord(rom[offset+1])!=0:
-                opstr = opt_table[temp_maybe][0].lower()
-    
+            # some commands have two opcodes
+            next_byte = ord(rom[offset+1])
+
+            # all two-byte opcodes also have their first byte in there somewhere
+            if current_byte in opt_table.keys():
+                # this might be a two-byte opcode
+                possible_opcode = current_byte + (next_byte << 8)
+
+                # check if this is a two-byte opcode
+                if possible_opcode in opt_table.keys():
+                    op_code = possible_opcode
+                else:
+                    op_code = current_byte
+                
+                op = opt_table[op_code]
+
+                opstr = op[0].lower()
+                optype = op[1]
+
+                asm_command["type"] = "op"
+                asm_command["id"] = op_code
+                asm_command["format"] = opstr
+                asm_command["opnumberthing"] = optype
+                
                 if "x" in opstr:
                     for x in range(0, opstr.count("x")):
                         insertion = ord(rom[offset + 1])
@@ -196,8 +203,8 @@
     
                         opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower()
     
-                        current_byte += 1
-                        offset += 1
+                        current_byte_number += 1
+                        offset += 1 
 
                 if "?" in opstr:
                     for y in range(0, opstr.count("?")):
@@ -213,145 +220,69 @@
     
                         current_byte_number += 2
                         offset += 2
-   
-                        asm_commands.append({"address": offset, "command": opstr})
-            output += spacing + opstr #+ " ; " + hex(offset)
-            output += "\n"
-    
-                current_byte_number += 2
-                offset += 2
-            elif maybe_byte in opt_table.keys():
-                op_code = opt_table[maybe_byte]
-                op_code_type = op_code[1]
-                op_code_byte = maybe_byte
-    
-                #type = -1 when it's the E op
-                #if op_code_type != -1:
-                if   op_code_type == 0 and ord(rom[offset]) == op_code_byte:
-                    op_str = op_code[0].lower()
-    
-                    output += spacing + op_code[0].lower() #+ " ; " + hex(offset)
-                    output += "\n"
-    
-                    offset += 1
-                    current_byte_number += 1
-                elif op_code_type == 1 and ord(rom[offset]) == op_code_byte:
-                    oplen = len(op_code[0])
-                    opstr = copy(op_code[0])
-                    xes = op_code[0].count("x")
-                    include_comment = False
-                    for x in range(0, xes):
-                        insertion = ord(rom[offset + 1])
-                        insertion = "$" + hex(insertion)[2:]
-    
-                        if current_byte == 0x18 or current_byte==0x20 or current_byte in relative_jumps: #jr or jr nz
-                            #generate a label for the byte we're jumping to
-                            target_address = offset + 2 + c_int8(ord(rom[offset + 1])).value
-                            if target_address in byte_labels.keys():
-                                byte_labels[target_address]["usage"] = 1 + byte_labels[target_address]["usage"]
-                                line_label2 = byte_labels[target_address]["name"]
-                            else:
-                                line_label2 = asm_label(target_address)
-                                byte_labels[target_address] = {}
-                                byte_labels[target_address]["name"] = line_label2
-                                byte_labels[target_address]["usage"] = 1
-                                byte_labels[target_address]["definition"] = False
-    
-                            insertion = line_label2.lower()
-                            include_comment = True
-                        elif current_byte == 0x3e:
-                            last_a_address = ord(rom[offset + 1])
-    
-                        opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower()
-                        output += spacing + opstr
-                        if include_comment:
-                            output += " ; " + hex(offset)
-                            if current_byte in relative_jumps:
-                                output += " $" + hex(ord(rom[offset + 1]))[2:]
-                        output += "\n"
-    
-                        current_byte_number += 1
-                        offset += 1
-                        insertion = ""
-    
-                    current_byte_number += 1
-                    offset += 1
-                    include_comment = False
-    
-                elif op_code_type == 2 and ord(rom[offset]) == op_code_byte:
-                    oplen = len(op_code[0])
-                    opstr = copy(op_code[0])
-                    qes = op_code[0].count("?")
-                    for x in range(0, qes):
-                        byte1 = ord(rom[offset + 1])
-                        byte2 = ord(rom[offset + 2])
-    
-                        number = byte1
-                        number += byte2 << 8;
-    
-                        insertion = "$%.4x" % (number)
-                        if maybe_byte in call_commands or current_byte in relative_unconditional_jumps or current_byte in relative_jumps:
-                            result = find_label(insertion, bank_id)
-                            if result != None:
-                                insertion = result
-    
-                        opstr = opstr[:opstr.find("?")].lower() + insertion + opstr[opstr.find("?")+1:].lower()
-                        output += spacing + opstr #+ " ; " + hex(offset)
-                        output += "\n"
-    
-                        current_byte_number += 2
-                        offset += 2
-    
-                    current_byte_number += 1
-                    offset += 1
-    
-                    if current_byte == 0x21:
-                        last_hl_address = byte1 + (byte2 << 8)
-                    if current_byte == 0xcd:
-                        if number == 0x3d97: used_3d97 = True
-                    #duck out if this is jp $24d7
-                    if current_byte == 0xc3 or current_byte in relative_unconditional_jumps:
-                        if current_byte == 0xc3:
-                            if number == 0x3d97: used_3d97 = True
-                        #if number == 0x24d7: #jp
-                        if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset):
-                            keep_reading = False
-                            is_data = False
-                            break
-                else:
-                    is_data = True
-    
-                #stop reading at a jump, relative jump or return
+
+                # Check for relative jumps, construct the formatted asm line.
+                # Also set the usage of labels.
+                if current_byte in [0x18, 0x20] or current_byte in relative_jumps: # jr or jr nz
+                    # generate a label for the byte we're jumping to
+                    target_address = offset + 2 + c_int8(ord(rom[offset + 1])).value
+
+                    if target_address in byte_labels.keys():
+                        byte_labels[target_address]["usage"] = 1 + byte_labels[target_address]["usage"]
+                        line_label2 = byte_labels[target_address]["name"]
+
+                    else:
+                        line_label2 = asm_label(target_address)
+                        byte_labels[target_address] = {}
+                        byte_labels[target_address]["name"] = line_label2
+                        byte_labels[target_address]["usage"] = 1
+                        byte_labels[target_address]["definition"] = False
+
+                    insertion = line_label2.lower()
+                    include_comment = True
+                elif current_byte == 0x3e:
+                    last_a_address = ord(rom[offset + 1])
+
+                # store the formatted string for the output later
+                asm_command["formatted"] = opstr
+
+                if current_byte == 0x21:
+                    last_hl_address = byte1 + (byte2 << 8)
+
+                # this is leftover from pokered, might be meaningless
+                if current_byte == 0xcd:
+                    if number == 0x3d97:
+                        used_3d97 = True
+                
+                if current_byte == 0xc3 or current_byte in relative_unconditional_jumps:
+                    if current_byte == 0xc3:
+                        if number == 0x3d97:
+                            used_3d97 = True
+
+                    if not has_outstanding_labels(byte_labels) or all_outstanding_labels_are_reverse(byte_labels, offset):
+                        keep_reading = False
+                        break
+
+                # stop reading at a jump, relative jump or return
                 if current_byte in end_08_scripts_with:
+                    is_data = False
+
                     if not has_outstanding_labels(byte_labels) and all_outstanding_labels_are_reverse(byte_labels, offset):
                         keep_reading = False
-                        is_data = False #cleanup
                         break
                     else:
-                        is_data = False
                         keep_reading = True
                 else:
-                    is_data = False
                     keep_reading = True
+
             else:
-            #if is_data and keep_reading:
-                output += spacing + "db $" + hex(ord(rom[offset]))[2:] #+ " ; " + hex(offset)
-                output += "\n"
-                offset += 1
-                current_byte_number += 1
-            #else the while loop would have spit out the opcode
-    
-            #these two are done prior
-            #offset += 1
-            #current_byte_number += 1
-    
-        # clean up unused labels.. used to be in 'output', but is now in asm_commands
-        for label_line in byte_labels.keys():
-            address = label_line
-            label_line = byte_labels[label_line]
-            if label_line["usage"] == 0:
-                output = output.replace((label_line["name"] + "\n").lower(), "")
-                raise NotImplementedError   
+                # This shouldn't really happen, and means that this area of the
+                # ROM probably doesn't represent instructions.
+                asm_command["type"] = "data" # db
+                asm_command["value"] = current_byte
+            
+            # save this new command in the list
+            asm_commands[current_byte_number] = asm_command
 
     def __str__(self):
         """ ASM pretty printer.