#!/usr/bin/env python3 import re import sys import argparse # Increased bucket size to handle overlapping wildcards MAX_BUCKET_SIZE = 64 TABLE_SIZE = 4096 # Bits [27:20] and [7:4] HASH_BITS_MASK = 0x0FF000F0 class Instruction: def __init__(self, name, mnemonic, bitstring, array_index): self.name = name self.mnemonic = mnemonic self.bitstring = bitstring self.array_index = array_index self.mask = 0 self.expected = 0 self.parse_bits() def parse_bits(self): if len(self.bitstring) != 32: print( f"Error: Bitstring length {len(self.bitstring)} invalid for {self.name}" ) sys.exit(1) for i, char in enumerate(self.bitstring): bit_pos = 31 - i if char == "0": self.mask |= 1 << bit_pos elif char == "1": self.mask |= 1 << bit_pos self.expected |= 1 << bit_pos # Variable bits (c, n, d, m, etc) leave mask as 0 def parse_inc_file(input_path): instructions = [] regex = re.compile(r'INST\(\s*([A-Za-z0-9_]+),\s*"(.*?)",\s*"(.*?)"\s*\)') try: with open(input_path, "r") as f: lines = f.readlines() except FileNotFoundError: print(f"Error: Could not find input file: {input_path}") sys.exit(1) index_counter = 0 for line in lines: line = line.strip() if not line or line.startswith("//"): continue match = regex.search(line) if match: inst = Instruction( match.group(1), match.group(2), match.group(3), index_counter ) instructions.append(inst) index_counter += 1 return instructions def generate_lookup_table(instructions): buckets = {i: [] for i in range(TABLE_SIZE)} # Iterate over every possible hash index to determine which instructions belong in it for i in range(TABLE_SIZE): # Reconstruct the 32-bit value that would generate this hash index # Hash algorithm: (Major << 4) | Minor # Major is bits [27:20], Minor is bits [7:4] major_val = (i >> 4) & 0xFF minor_val = i & 0x0F # Create a "Probe" value with the hash bits set probe_val = (major_val << 20) | (minor_val << 4) for inst in instructions: # Check if this instruction matches this hash index. # An instruction matches if its FIXED bits (mask) match the Probe bits # for the specific positions used by the hash. relevant_mask = inst.mask & HASH_BITS_MASK relevant_expected = inst.expected & HASH_BITS_MASK if (probe_val & relevant_mask) == relevant_expected: buckets[i].append(inst) if len(buckets[i]) > MAX_BUCKET_SIZE: print( f"FATAL ERROR: Bucket {i:#05x} overflowed! Size: {len(buckets[i])}" ) print( "This means too many instructions map to the same hash index." ) sys.exit(1) return buckets def write_decoder_table_h_file(path): print(f"Generating decoder table header file: {path}") with open(path, "w") as f: f.write("/* GENERATED FILE - DO NOT EDIT */\n") f.write("/* This file is generated by scripts/generate_jit_assets.py */\n") f.write("#ifndef POUND_JIT_DECODER_ARM32_GENERATED_H\n") f.write("#define POUND_JIT_DECODER_ARM32_GENERATED_H\n\n") f.write('#include "arm32.h"\n') f.write("#include \n\n") f.write(f"#define LOOKUP_TABLE_MAX_BUCKET_SIZE {MAX_BUCKET_SIZE}U\n\n") f.write("typedef struct {\n") f.write( " const pvm_jit_decoder_arm32_instruction_info_t *instructions[LOOKUP_TABLE_MAX_BUCKET_SIZE];\n" ) f.write(" size_t count;\n") f.write("} decode_bucket_t;\n\n") f.write( f"extern const decode_bucket_t g_decoder_lookup_table[{TABLE_SIZE}];\n\n" ) f.write("#endif\n") def write_opcodes_header(path, instructions): """Generates the arm32_opcodes.h file with a unique enum for each mnemonic.""" print(f"Generating opcode header file: {path}") seen = set() with open(path, "w") as f: f.write("/* GENERATED FILE - DO NOT EDIT */\n") f.write("/* This file is generated by scripts/generate_jit_assets.py */\n") f.write("#ifndef POUND_JIT_DECODER_ARM32_OPCODES_H\n") f.write("#define POUND_JIT_DECODER_ARM32_OPCODES_H\n\n") f.write("typedef enum {\n") for inst in instructions: enum_name = f" PVM_A32_OP_{inst.name.upper()},\n" if enum_name not in seen: f.write(enum_name) seen.add(enum_name) f.write(" PVM_A32_OP_STOP,\n") f.write("} pvm_jit_decoder_arm32_opcode_t;\n\n") f.write("#endif // POUND_JIT_DECODER_ARM32_OPCODES_H\n") def write_decoder_table_c_file(path, instructions, buckets): """Writes the decoder C file, now including the opcode enum.""" print(f"Generating decoder table source file: {path}") with open(path, "w") as f: f.write("/* GENERATED FILE - DO NOT EDIT */\n") f.write("/* This file is generated by scripts/generate_jit_assets.py */\n") f.write('#include "arm32.h"\n') f.write('#include "arm32_table.h"\n\n') f.write( f"static const pvm_jit_decoder_arm32_instruction_info_t g_instructions[{len(instructions)}] = {{\n" ) for inst in instructions: f.write( f' {{ "{inst.mnemonic}", "{inst.bitstring}", PVM_A32_OP_{inst.name.upper()}, {inst.mask:#010x}U, {inst.expected:#010x}U }},\n' ) f.write("};\n") f.write(f"const decode_bucket_t g_decoder_lookup_table[{TABLE_SIZE}] = {{\n") for i in range(TABLE_SIZE): if len(buckets[i]) > 0: f.write(f" [{i:#05x}] = {{ .instructions = {{ ") for inst in buckets[i]: f.write(f"&g_instructions[{inst.array_index}], ") f.write(f"}}, .count = {len(buckets[i])}U }},\n") f.write("};\n") def write_interpreter_handler_table(path, instructions): """Generates the dispatch table.""" print(f"Generating interpreter handler table: {path}") seen = set() with open(path, "w") as f: for inst in instructions: enum_name = f"PVM_A32_OP_{inst.name.upper()}" if enum_name not in seen: f.write(f" [{enum_name}] = &&{enum_name},\n") seen.add(enum_name) f.write(f" [PVM_A32_OP_STOP] = &&PVM_A32_OP_STOP,\n") def write_interpreter_handler_skeletons(path, instructions): """Generates a skeleton file for handlers.""" print(f"Generating new skeleton file: {path}") seen = set() with open(path, "w") as f: f.write("/*\n") f.write(" * GENERATED FILE - DO NOT EDIT\n") f.write(" * This file is generated by scripts/generate_jit_assets.py \n") f.write( " * This file contains pre-generated, empty handler blocks for the every instruction.\n" ) f.write(" */\n\n") for inst in instructions: enum_name = f"HANDLER(PVM_A32_OP_{inst.name.upper()}): {{\n" if enum_name not in seen: f.write(enum_name) seen.add(enum_name) f.write(f" // TODO: Implement handler for {inst.mnemonic}\n") f.write(" DISPATCH();\n") f.write("}\n\n") f.write(f"HANDLER(PVM_A32_OP_STOP): {{\n") f.write(f" // TODO: Implement handler for PVM_A32_OP_STOP\n") f.write(" DISPATCH();\n") f.write("}\n\n") def main(): parser = argparse.ArgumentParser(description="Generate ARM32 Decoder Tables") parser.add_argument("input") parser.add_argument("--out-opcodes-h") parser.add_argument("--out-decoder-c") parser.add_argument("--out-decoder-h") parser.add_argument("--out-handler-table-inc") parser.add_argument("--out-handler-skeletons-inc") args = parser.parse_args() instructions = parse_inc_file(args.input) buckets = generate_lookup_table(instructions) # Generate all necessary files write_opcodes_header(args.out_opcodes_h, instructions) write_decoder_table_c_file(args.out_decoder_c, instructions, buckets) write_decoder_table_h_file(args.out_decoder_h) write_interpreter_handler_table(args.out_handler_table_inc, instructions) write_interpreter_handler_skeletons(args.out_handler_skeletons_inc, instructions) if __name__ == "__main__": main()