mirror of
https://github.com/pound-emu/pound.git
synced 2025-12-11 07:36:57 +00:00
Introduces the first unit tests for the ARM32 JIT decoder. A new script automatically generates a test case for every instruction in arm32.inc, providing 100% of the isa. This also includes a critical rework of the decoder's lookup table generation logic. The previous hashing method was flawed, causing build-time overflows and incorrect instruction matching (shadowing) for patterns with wildcards. The new algorithm correctly populates the lookup table. Signed-off-by: Ronald Caesar <github43132@proton.me>
147 lines
5.4 KiB
Python
147 lines
5.4 KiB
Python
#!/usr/bin/env python3
|
|
import re
|
|
import sys
|
|
import argparse
|
|
|
|
# Increased bucket size to handle overlapping wildcards
|
|
MAX_BUCKET_SIZE = 64
|
|
TABLE_SIZE = 4096
|
|
|
|
# Bits [27:20] and [7:4]
|
|
HASH_BITS_MASK = 0x0FF000F0
|
|
|
|
class Instruction:
|
|
def __init__(self, name, mnemonic, bitstring, array_index):
|
|
self.name = name
|
|
self.mnemonic = mnemonic
|
|
self.bitstring = bitstring
|
|
self.array_index = array_index
|
|
self.mask = 0
|
|
self.expected = 0
|
|
self.parse_bits()
|
|
|
|
def parse_bits(self):
|
|
if len(self.bitstring) != 32:
|
|
print(f"Error: Bitstring length {len(self.bitstring)} invalid for {self.name}")
|
|
sys.exit(1)
|
|
|
|
for i, char in enumerate(self.bitstring):
|
|
bit_pos = 31 - i
|
|
if char == '0':
|
|
self.mask |= (1 << bit_pos)
|
|
elif char == '1':
|
|
self.mask |= (1 << bit_pos)
|
|
self.expected |= (1 << bit_pos)
|
|
# Variable bits (c, n, d, m, etc) leave mask as 0
|
|
|
|
def parse_inc_file(input_path):
|
|
instructions = []
|
|
regex = re.compile(r'INST\(\s*([A-Za-z0-9_]+),\s*"(.*?)",\s*"(.*?)"\s*\)')
|
|
|
|
try:
|
|
with open(input_path, 'r') as f:
|
|
lines = f.readlines()
|
|
except FileNotFoundError:
|
|
print(f"Error: Could not find input file: {input_path}")
|
|
sys.exit(1)
|
|
|
|
index_counter = 0
|
|
for line in lines:
|
|
line = line.strip()
|
|
if not line or line.startswith("//"):
|
|
continue
|
|
|
|
match = regex.search(line)
|
|
if match:
|
|
inst = Instruction(match.group(1), match.group(2), match.group(3), index_counter)
|
|
instructions.append(inst)
|
|
index_counter += 1
|
|
return instructions
|
|
|
|
def generate_lookup_table(instructions):
|
|
buckets = {i: [] for i in range(TABLE_SIZE)}
|
|
|
|
# Iterate over every possible hash index to determine which instructions belong in it
|
|
for i in range(TABLE_SIZE):
|
|
# Reconstruct the 32-bit value that would generate this hash index
|
|
# Hash algorithm: (Major << 4) | Minor
|
|
# Major is bits [27:20], Minor is bits [7:4]
|
|
|
|
major_val = (i >> 4) & 0xFF
|
|
minor_val = i & 0x0F
|
|
|
|
# Create a "Probe" value with the hash bits set
|
|
probe_val = (major_val << 20) | (minor_val << 4)
|
|
|
|
for inst in instructions:
|
|
# Check if this instruction matches this hash index.
|
|
# An instruction matches if its FIXED bits (mask) match the Probe bits
|
|
# for the specific positions used by the hash.
|
|
|
|
relevant_mask = inst.mask & HASH_BITS_MASK
|
|
relevant_expected = inst.expected & HASH_BITS_MASK
|
|
|
|
if (probe_val & relevant_mask) == relevant_expected:
|
|
buckets[i].append(inst)
|
|
|
|
if len(buckets[i]) > MAX_BUCKET_SIZE:
|
|
print(f"FATAL ERROR: Bucket {i:#05x} overflowed! Size: {len(buckets[i])}")
|
|
print("This means too many instructions map to the same hash index.")
|
|
sys.exit(1)
|
|
|
|
return buckets
|
|
|
|
def write_c_file(path, instructions, buckets):
|
|
with open(path, 'w') as f:
|
|
f.write("/* GENERATED FILE - DO NOT EDIT */\n")
|
|
f.write("/* This file is generated by scripts/generate_jit_decoder_a32_table.py */\n")
|
|
f.write('#include "arm32.h"\n')
|
|
f.write('#include "arm32_table_generated.h"\n\n')
|
|
|
|
f.write(f"static const pvm_jit_decoder_arm32_instruction_info_t g_instructions[{len(instructions)}] = {{\n")
|
|
for inst in instructions:
|
|
f.write(f' {{ "{inst.mnemonic}", "{inst.bitstring}", {inst.mask:#010x}U, {inst.expected:#010x}U }},\n')
|
|
f.write("};\n\n")
|
|
|
|
f.write(f"const decode_bucket_t g_decoder_lookup_table[{TABLE_SIZE}] = {{\n")
|
|
for i in range(TABLE_SIZE):
|
|
if len(buckets[i]) > 0:
|
|
f.write(f" [{i:#05x}] = {{ .instructions = {{ ")
|
|
for inst in buckets[i]:
|
|
f.write(f"&g_instructions[{inst.array_index}], ")
|
|
f.write(f"}}, .count = {len(buckets[i])}U }},\n")
|
|
f.write("};\n")
|
|
|
|
def write_h_file(path):
|
|
with open(path, 'w') as f:
|
|
f.write("#ifndef POUND_JIT_DECODER_ARM32_GENERATED_H\n")
|
|
f.write("#define POUND_JIT_DECODER_ARM32_GENERATED_H\n\n")
|
|
f.write('#include "arm32.h"\n')
|
|
f.write('#include <stddef.h>\n\n')
|
|
f.write(f"#define LOOKUP_TABLE_MAX_BUCKET_SIZE {MAX_BUCKET_SIZE}U\n\n")
|
|
f.write("typedef struct {\n")
|
|
f.write(" const pvm_jit_decoder_arm32_instruction_info_t *instructions[LOOKUP_TABLE_MAX_BUCKET_SIZE];\n")
|
|
f.write(" size_t count;\n")
|
|
f.write("} decode_bucket_t;\n\n")
|
|
f.write(f"extern const decode_bucket_t g_decoder_lookup_table[{TABLE_SIZE}];\n\n")
|
|
f.write("#endif\n")
|
|
|
|
# ---------------------------------------------------------
|
|
# Main Execution
|
|
# ---------------------------------------------------------
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Generate ARM32 Decoder Tables")
|
|
parser.add_argument("input", help="Path to arm32.inc")
|
|
parser.add_argument("out_c", help="Path to output .c file")
|
|
parser.add_argument("out_h", help="Path to output .h file")
|
|
args = parser.parse_args()
|
|
|
|
print(f"{args.input} -> {args.out_c}")
|
|
instructions = parse_inc_file(args.input)
|
|
buckets = generate_lookup_table(instructions)
|
|
write_c_file(args.out_c, instructions, buckets)
|
|
write_h_file(args.out_h)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|