jit/decoder: Add generated arm32 tests

Introduces the first unit tests for the ARM32 JIT decoder. A new script
automatically generates a test case for every instruction in arm32.inc,
providing 100% of the isa.

This also includes a critical rework of the decoder's lookup table
generation logic. The previous hashing method was flawed, causing
build-time overflows and incorrect instruction matching (shadowing) for
patterns with wildcards. The new algorithm correctly populates the
lookup table.

Signed-off-by: Ronald Caesar <github43132@proton.me>
This commit is contained in:
Ronald Caesar 2025-11-30 04:47:52 -04:00
parent c235e57071
commit d1e3919a8c
No known key found for this signature in database
GPG key ID: 04307C401999C596
13 changed files with 37513 additions and 502 deletions

View file

@ -1,16 +1,15 @@
#!/usr/bin/env python3
import re
import sys
import datetime
import argparse
# ---------------------------------------------------------
# Configuration & Logic
# ---------------------------------------------------------
MAX_BUCKET_SIZE = 18
# Increased bucket size to handle overlapping wildcards
MAX_BUCKET_SIZE = 64
TABLE_SIZE = 4096
# Bits [27:20] and [7:4]
HASH_BITS_MASK = 0x0FF000F0
class Instruction:
def __init__(self, name, mnemonic, bitstring, array_index):
self.name = name
@ -33,11 +32,7 @@ class Instruction:
elif char == '1':
self.mask |= (1 << bit_pos)
self.expected |= (1 << bit_pos)
def get_hash(self):
major = (self.expected >> 20) & 0xFF
minor = (self.expected >> 4) & 0x0F
return (major << 4) | minor
# Variable bits (c, n, d, m, etc) leave mask as 0
def parse_inc_file(input_path):
instructions = []
@ -65,19 +60,43 @@ def parse_inc_file(input_path):
def generate_lookup_table(instructions):
buckets = {i: [] for i in range(TABLE_SIZE)}
for inst in instructions:
idx = inst.get_hash()
buckets[idx].append(inst)
if len(buckets[idx]) > MAX_BUCKET_SIZE:
print(f"FATAL ERROR: Bucket {idx:#05x} overflowed! Size: {len(buckets[idx])}")
sys.exit(1)
# Iterate over every possible hash index to determine which instructions belong in it
for i in range(TABLE_SIZE):
# Reconstruct the 32-bit value that would generate this hash index
# Hash algorithm: (Major << 4) | Minor
# Major is bits [27:20], Minor is bits [7:4]
major_val = (i >> 4) & 0xFF
minor_val = i & 0x0F
# Create a "Probe" value with the hash bits set
probe_val = (major_val << 20) | (minor_val << 4)
for inst in instructions:
# Check if this instruction matches this hash index.
# An instruction matches if its FIXED bits (mask) match the Probe bits
# for the specific positions used by the hash.
relevant_mask = inst.mask & HASH_BITS_MASK
relevant_expected = inst.expected & HASH_BITS_MASK
if (probe_val & relevant_mask) == relevant_expected:
buckets[i].append(inst)
if len(buckets[i]) > MAX_BUCKET_SIZE:
print(f"FATAL ERROR: Bucket {i:#05x} overflowed! Size: {len(buckets[i])}")
print("This means too many instructions map to the same hash index.")
sys.exit(1)
return buckets
def write_c_file(path, instructions, buckets):
with open(path, 'w') as f:
f.write("/* GENERATED FILE - DO NOT EDIT */\n")
f.write("/* This file is generated by scripts/generate_jit_decoder_a32_table.py */\n")
f.write('#include "arm32.h"\n')
f.write('#include "arm32_table_generated.h"\n')
f.write('#include "arm32_table_generated.h"\n\n')
f.write(f"static const pvm_jit_decoder_arm32_instruction_info_t g_instructions[{len(instructions)}] = {{\n")
for inst in instructions:
@ -118,7 +137,7 @@ def main():
parser.add_argument("out_h", help="Path to output .h file")
args = parser.parse_args()
print(f"--- Generating Decoder: {args.input} -> {args.out_c} ---")
print(f"{args.input} -> {args.out_c}")
instructions = parse_inc_file(args.input)
buckets = generate_lookup_table(instructions)
write_c_file(args.out_c, instructions, buckets)