jit/decoder: Add generated arm32 tests

Introduces the first unit tests for the ARM32 JIT decoder. A new script automatically generates a test case for every instruction in arm32.inc, providing 100% of the isa. This also includes a critical rework of the decoder's lookup table generation logic. The previous hashing method was flawed, causing build-time overflows and incorrect instruction matching (shadowing) for patterns with wildcards. The new algorithm correctly populates the lookup table. Signed-off-by: Ronald Caesar <github43132@proton.me>
2025-12-11 07:36:57 +00:00 · 2025-11-30 04:47:52 -04:00 · 2025-11-30 04:47:52 -04:00 · d1e3919a8c
commit d1e3919a8c
parent c235e57071
13 changed files with 37513 additions and 502 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -114,15 +114,16 @@ jobs:

      - name: Install Dependencies
        run: |
-          brew install llvm
+          brew install llvm@20
+          echo "CMAKE_PREFIX_PATH=/usr/local/opt/llvm@20" >> $GITHUB_ENV

      - name: Configure CMake (x86_64)
        run: >
          cmake -G Ninja -B "${{env.BUILD_DIR}}"
          -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
          -DCMAKE_OSX_ARCHITECTURES=x86_64
-          -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang
-          -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++
+          -DCMAKE_C_COMPILER=/usr/local/opt/llvm@20/bin/clang
+          -DCMAKE_CXX_COMPILER=/usr/local/opt/llvm@20/bin/clang++
          -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15

      - name: Build (x86_64)
@ -156,14 +157,15 @@ jobs:

      - name: Install Dependencies
        run: |
-          brew install llvm
+          brew install llvm@20
+          echo "CMAKE_PREFIX_PATH=/opt/homebrew/opt/llvm@20" >> $GITHUB_ENV

      - name: Configure CMake (ARM64)
        run: >
          cmake -G Ninja -B "${{env.BUILD_DIR}}"
          -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
-          -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang
-          -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++
+          -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm@20/bin/clang
+          -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm@20/bin/clang++
          -DCMAKE_OSX_ARCHITECTURES=arm64
          -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -13,6 +13,8 @@ endif()
 set(CMAKE_C_STANDARD 11)
 set(CMAKE_C_STANDARD_REQUIRED TRUE)
 set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

 #-------------------------------
@ -91,17 +93,29 @@ message(STATUS "All submodules verified successfully")
 #-----------------------------
 # ---- Target Definitions ----
 #-----------------------------
-
 add_executable(Pound
    src/main.c
 )

-set(TEST_SRC
-    ${CMAKE_CURRENT_SOURCE_DIR}/tests/jit/ir/test_value.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/tests/jit/decoder/test_arm32.cpp
+find_package(Python3 REQUIRED)
+
+# Define the test generation command
+set(GEN_TEST_SRC ${CMAKE_CURRENT_SOURCE_DIR}/tests/jit/decoder/test_arm32_generated.cpp)
+
+add_custom_command(
+    OUTPUT ${GEN_TEST_SRC}
+    COMMAND Python3::Interpreter ${CMAKE_SOURCE_DIR}/scripts/generate_decoder_tests.py
+            ${CMAKE_SOURCE_DIR}/src/jit/frontend/decoder/arm32.inc
+            ${GEN_TEST_SRC}
+    DEPENDS ${CMAKE_SOURCE_DIR}/scripts/generate_decoder_tests.py
+            ${CMAKE_SOURCE_DIR}/src/jit/frontend/decoder/arm32.inc
+            COMMENT "Generating ARM32 Decoder Tests"
 )

-#add_executable(tests ${TEST_SRC})
+# Add to test executable
+add_executable(tests
+    ${GEN_TEST_SRC}
+)

 add_subdirectory(3rd_Party)
 add_subdirectory(src/common)
@ -161,8 +175,8 @@ target_link_libraries(Pound PRIVATE
 )


-#target_link_libraries(tests PRIVATE
-#    jit
-#    gtest
-#    gtest_main
-#)
+target_link_libraries(tests PRIVATE
+    jit
+    gtest
+    gtest_main
+)
--- a/scripts/generate_decoder_tests.py
+++ b/scripts/generate_decoder_tests.py
@ -0,0 +1,454 @@
+"""
+This script employs several sophisticated techniques to ensure the quality and
+correctness of the generated tests.
+
+
+# Instruction Parsing
+
+The script begins by parsing the `arm32.inc` file. It uses a regular expression
+to find all occurrences of the `INST()` macro and extracts three key pieces of
+information for each instruction:
+
+1.  **Mnemonic**: A short, unique identifier (e.g., `ADD_imm`).
+2.  **Name**: A human-readable description (e.g., `"ADD (imm)"`).
+3.  **Bitstring**: A 32-character string representing the instruction's binary
+                   encoding.
+
+The bitstring is the most critical piece. It's a mix of `'0'`, `'1'`, and
+wildcard characters (like `v`, `n`, `c`) that represent variable fields.
+
+
+# Randomized Instantiation & Constraint System
+
+To create a concrete test case from an abstract bitstring, the script must
+generate a valid 32-bit integer. It does this by:
+
+1.  Setting the fixed `'0'` and `'1'` bits.
+2.  Randomly generating `'0'` or `'1'` for all wildcard bits.
+
+However, simple randomization can be problematic. Certain ARM instructions
+are specializations of more general patterns. For example, the `SXTB`
+instruction is a special case of the `SXTAB` instruction where the `Rn`
+register field is `1111`.
+
+If we randomly generate an `SXTAB` test case where `Rn` happens to be `1111`,
+the decoder might (correctly) identify it as `SXTB`. This would cause the
+`SXTAB` test to fail.
+
+To prevent this, the script uses a **constraint system**
+(`get_instruction_constraints`). This function defines rules to avoid
+generating ambiguous encodings. When generating a test for a general
+instruction (`SXTAB`), it forces the specialized bits (`Rn`) to be a value other
+than the one that would cause it to alias to the more specific instruction
+(`SXTB`). This ensures each test validates exa ctly one unique instruction
+definition.
+
+
+# Oracle-Based Negative Testing
+
+The most powerful feature of this script is its **negative verification**
+strategy. For every fixed `'0'` or `'1'` bit in an instruction's bitstring,
+the script generates a test case where that single bit is flipped.
+This creates an instruction that is intentionally invalid *for that specific
+pattern*.
+
+The script then uses an "oracle" — a Python-based reference decoder
+(`python_decode`) — to predict what this corrupted instruction *should*
+decode to. The corrupted value might match a different valid instruction, or
+it might be completely invalid (decode to `NULL`).
+
+The generated C++ test then asserts that the actual C++ decoder's output
+exactly matches the oracle's prediction. This guarantees that  the decoder
+rejects invalid patterns that are only one bit off from a valid one.
+
+
+#  Fuzz Testing
+
+Finally, the script generates a fuzz test that feeds a large number (100,000)
+of completely random 32-bit integers to the decoder. This test serves as a
+stability and integrity check. If the  decoder identifies any of these random
+inputs as a valid instruction, it cross-verifies that the input truly matches
+the mask and expected value for that instruction. This ensures the decoder
+never produces "false positives" and is robust against arbitrary data.
+"""
+
+#!/usr/bin/env python3
+import re
+import sys
+import argparse
+import random
+from typing import List, Dict, Optional, Tuple
+
+CPP_HEADER = """/*
+ * GENERATED FILE - DO NOT EDIT
+ *
+ * This file is generated by scripts/generate_decoder_tests.py
+ *
+ * PURPOSE:
+ * Provides 100% requirements-based test coverage for the ARM32 Instruction Decoder.
+ */
+
+#include "jit/frontend/decoder/arm32.h"
+#include <gtest/gtest.h>
+#include <random>
+
+class Arm32DecoderGeneratedTest : public ::testing::Test {
+protected:
+    void SetUp() override {
+    }
+};
+"""
+
+
+class Instruction:
+    """A container for a parsed instruction definition."""
+
+    def __init__(
+        self,
+        mnemonic: str,
+        name: str,
+        bitstring: str,
+        val: int,
+        mask: int,
+        expected: int,
+    ):
+        self.mnemonic: str = mnemonic
+        self.name: str = name
+        self.bitstring: str = bitstring
+        self.val: int = val  # A randomly generated valid encoding
+        self.mask: int = mask
+        self.expected: int = expected
+
+
+def get_instruction_constraints(name: str) -> Dict[int, int]:
+    """
+    Returns a dictionary of {bit_index: value} to force specific instructions
+    to avoid generating encodings that belong to other, more specific instructions.
+    This prevents instruction aliasing during randomized test case generation.
+
+    Args:
+        name: The human-readable name of the instruction (e.g., "SXTAB").
+
+    Returns:
+        A dictionary mapping bit positions (31-0) to a required value (0 or 1).
+    """
+    constraints: Dict[int, int] = {}
+
+    # -------------------------------------------------------------------------
+    # Load/Store & Coprocessor Collisions
+    # -------------------------------------------------------------------------
+    # To ensure LDC doesn't look like MRRC, we force P=1 (Bit 24). MRRC requires bit 24=0.
+    if name in ["LDC", "LDC2", "STC", "STC2"]:
+        constraints[24] = 1
+
+    # Unprivileged Loads (LDRT/STRT) become aliases if P=0, W=1.
+    # Force P=1 for standard loads to avoid these aliases.
+    if name in [
+        "LDR (reg)",
+        "LDRB (reg)",
+        "LDRH (reg)",
+        "LDRSB (reg)",
+        "LDRSH (reg)",
+        "STR (reg)",
+        "STRB (reg)",
+        "STRH (reg)",
+        "LDRD (reg)",
+        "STRD (reg)",
+    ]:
+        constraints[24] = 1
+    if name in ["LDR (imm)", "LDRB (imm)", "STR (imm)", "STRB (imm)"]:
+        constraints[24] = 1
+
+    # -------------------------------------------------------------------------
+    # Extend Instructions (e.g., SXTB/SXTAB)
+    # -------------------------------------------------------------------------
+    # SXTB is SXTAB with Rn=1111 (bits 19-16).
+    # When testing the more generic SXTAB, ensure Rn is not 1111. We force 0.
+    if name in ["SXTAB", "SXTAB16", "SXTAH", "UXTAB", "UXTAB16", "UXTAH"]:
+        constraints[19] = 0
+        constraints[18] = 0
+        constraints[17] = 0
+        constraints[16] = 0
+
+    # -------------------------------------------------------------------------
+    # Multiply Instructions
+    # -------------------------------------------------------------------------
+    # SMMUL is SMMLA with Ra=1111. Force Ra!=1111 for SMMLA tests.
+    if name in ["SMMLA", "SMMLS"]:
+        constraints[15] = 0
+        constraints[14] = 0
+        constraints[13] = 0
+        constraints[12] = 0
+
+    # SMUAD is SMLAD with Ra=1111. Force Ra!=1111 for SMLAD-family tests.
+    if name in ["SMLAD", "SMLSD", "SMLALD", "SMLSLD"]:
+        constraints[15] = 0
+        constraints[14] = 0
+        constraints[13] = 0
+        constraints[12] = 0
+
+    return constraints
+
+
+def calculate_mask_and_expected(bitstring: str) -> Tuple[int, int]:
+    """
+    Calculates the mask and expected value from a bitstring pattern.
+    - '1' sets the bit in both mask and expected.
+    - '0' sets the bit in the mask only.
+    - Wildcards leave the bit as 0 in both.
+
+    Args:
+        bitstring: The 32-character instruction pattern.
+
+    Returns:
+        A tuple containing the (mask, expected) integer values.
+    """
+    mask: int = 0
+    expected: int = 0
+    for i, char in enumerate(bitstring):
+        bit_pos = 31 - i
+        if char == "0":
+            mask |= 1 << bit_pos
+        elif char == "1":
+            mask |= 1 << bit_pos
+            expected |= 1 << bit_pos
+    return mask, expected
+
+
+def parse_bitstring_randomized(name: str, bitstring: str) -> int:
+    """
+    Generates a concrete, valid instruction word from a bitstring pattern.
+    Applies constraints to avoid generating ambiguous instruction aliases.
+
+    Args:
+        name: The human-readable name of the instruction.
+        bitstring: The 32-character instruction pattern.
+
+    Returns:
+        A 32-bit integer representing a valid encoding of the instruction.
+    """
+    val: int = 0
+    if len(bitstring) != 32:
+        raise ValueError(f"Invalid bitstring length: {len(bitstring)}")
+
+    constraints = get_instruction_constraints(name)
+
+    for i, char in enumerate(bitstring):
+        bit_pos = 31 - i
+        # Apply constraints first if they exist for this bit
+        if bit_pos in constraints:
+            if constraints[bit_pos] == 1:
+                val |= 1 << bit_pos
+            continue
+
+        # Set fixed bits or randomize wildcard bits
+        if char == "1":
+            val |= 1 << bit_pos
+        elif char not in ("0", "1"):
+            if random.choice([True, False]):
+                val |= 1 << bit_pos
+    return val
+
+
+def parse_inc_file(input_path: str) -> List[Instruction]:
+    """
+    Parses an arm32.inc file and returns a list of Instruction objects.
+
+    Args:
+        input_path: The path to the arm32.inc file.
+
+    Returns:
+        A list of Instruction objects, one for each INST macro found.
+    """
+    instructions: List[Instruction] = []
+    regex = re.compile(r'INST\(\s*([A-Za-z0-9_]+),\s*"(.*?)",\s*"(.*?)"\s*\)')
+
+    try:
+        with open(input_path, "r") as f:
+            lines = f.readlines()
+    except FileNotFoundError:
+        print(f"Error: Could not find input file: {input_path}")
+        sys.exit(1)
+
+    for line in lines:
+        line = line.strip()
+        if not line or line.startswith("//"):
+            continue
+
+        match = regex.search(line)
+        if match:
+            mnemonic = match.group(1)
+            name = match.group(2)
+            bitstring = match.group(3)
+
+            val = parse_bitstring_randomized(name, bitstring)
+            mask, expected = calculate_mask_and_expected(bitstring)
+
+            # Manual Patch for MSR (imm), which has a complex, non-randomizable constraint.
+            # The bitstring is cccc00110010mmmm1111rrrrvvvvvvvv, but if `vvvv` fields are all 0,
+            # it becomes a different instruction. We force a non-zero immediate to ensure a valid MSR.
+            if name == "MSR (imm)":
+                val |= 1 << 16  # Set bit 16 of the immediate field
+
+            instructions.append(
+                Instruction(mnemonic, name, bitstring, val, mask, expected)
+            )
+    return instructions
+
+
+def python_decode(val: int, instructions: List[Instruction]) -> Optional[str]:
+    """
+    Acts as a reference decoder (oracle). Returns the name of the first instruction that matches 'val'.
+    This simulates the linear scan of the C decoder to predict the correct result.
+
+    Args:
+        val: The 32-bit instruction word to decode.
+        instructions: The list of instruction definitions, in order of precedence.
+
+    Returns:
+        The name of the matching instruction, or None if no match is found.
+    """
+    for inst in instructions:
+        if (val & inst.mask) == inst.expected:
+            return inst.name
+    return None
+
+
+def generate_cpp_tests(instructions: List[Instruction], output_path: str) -> None:
+    """
+    Generates the C++ test file content and writes it to the output path.
+
+    Args:
+        instructions: A list of all instruction definitions.
+        output_path: The path to write the generated .cpp file.
+    """
+    with open(output_path, "w") as f:
+        f.write(CPP_HEADER)
+
+        mnemonic_counts: Dict[str, int] = {}
+
+        for inst in instructions:
+            base_mnemonic = inst.mnemonic
+            val = inst.val
+            name = inst.name
+            bitstring = inst.bitstring
+
+            # Generate a unique test name, handling multiple definitions for one mnemonic
+            if base_mnemonic not in mnemonic_counts:
+                mnemonic_counts[base_mnemonic] = 1
+                test_name = f"Verify_{base_mnemonic}"
+            else:
+                mnemonic_counts[base_mnemonic] += 1
+                count = mnemonic_counts[base_mnemonic]
+                test_name = f"Verify_{base_mnemonic}_{count}"
+
+            f.write(f"TEST_F(Arm32DecoderGeneratedTest, {test_name}) {{\n")
+
+            # --- 1. Positive Verification ---
+            f.write(
+                f'    // 1. Positive Verification: Ensures "{name}" is correctly identified.\n'
+            )
+            f.write(f"    const uint32_t valid_inst = {val:#010x};\n")
+            f.write(
+                f"    const pvm_jit_decoder_arm32_instruction_info_t* info = pvm_jit_decoder_arm32_decode(valid_inst);\n\n"
+            )
+            f.write(
+                f'    ASSERT_NE(info, nullptr) << "Failed to decode known valid pattern for {name}: {val:#x}";\n'
+            )
+            f.write(
+                f'    EXPECT_STREQ(info->name, "{name}") << "Decoded as the wrong instruction variant.";\n'
+            )
+            f.write(
+                f'    EXPECT_EQ((valid_inst & info->mask), info->expected) << "Mask/Expected mismatch on positive test.";\n\n'
+            )
+
+            # --- 2. Negative Verification (Oracle Based) ---
+            f.write(
+                f"    // 2. Negative Verification: Flip each fixed bit to ensure correct alternative decoding or rejection.\n"
+            )
+
+            for i, char in enumerate(bitstring):
+                bit_pos = 31 - i
+
+                # We only test the fixed bits, as they define the instruction pattern.
+                if char in ("0", "1"):
+                    mask = 1 << bit_pos
+                    corrupt_inst = val ^ mask
+
+                    # ORACLE: Determine what this corrupted instruction SHOULD decode to.
+                    expected_decoded_name = python_decode(corrupt_inst, instructions)
+
+                    f.write(f"    {{\n")
+                    f.write(f"        // Test case: Flipping fixed bit {bit_pos}\n")
+                    f.write(
+                        f"        const uint32_t corrupt_inst = {corrupt_inst:#010x};\n"
+                    )
+                    f.write(
+                        f"        const pvm_jit_decoder_arm32_instruction_info_t* neg_info = pvm_jit_decoder_arm32_decode(corrupt_inst);\n\n"
+                    )
+
+                    if expected_decoded_name is None:
+                        # Should decode to NOTHING
+                        f.write(f"        // Oracle predicts no match.\n")
+                        f.write(
+                            f'        EXPECT_EQ(neg_info, nullptr) << "Safety Violation: Should have decoded to nullptr, but got " << (neg_info ? neg_info->name : "nullptr");\n'
+                        )
+                    else:
+                        # Should decode to the OTHER valid instruction
+                        f.write(
+                            f"        // Oracle predicts this should decode as: {expected_decoded_name}\n"
+                        )
+                        f.write(
+                            f"        ASSERT_NE(neg_info, nullptr) << \"Safety Violation: Python Oracle predicted '{expected_decoded_name}' but C++ decoder returned null\";\n"
+                        )
+                        f.write(
+                            f'        EXPECT_STREQ(neg_info->name, "{expected_decoded_name}") << "Safety Violation: Incorrect decode on single-bit corruption.";\n'
+                        )
+
+                    f.write(f"    }}\n")
+
+            f.write(f"}}\n\n")
+
+        # --- 3. Generate Fuzz Test for overall stability ---
+        f.write("TEST_F(Arm32DecoderGeneratedTest, Stability_Fuzz_Test) {\n")
+        f.write(
+            "    // Feeds a large number of random inputs to the decoder to check for crashes or false positives.\n"
+        )
+        f.write("    std::mt19937 rng(42); // Fixed seed for deterministic runs\n")
+        f.write("    std::uniform_int_distribution<uint32_t> dist;\n\n")
+        f.write("    for(int i = 0; i < 100000; ++i) {\n")
+        f.write("        uint32_t random_inst = dist(rng);\n")
+        f.write(
+            "        const pvm_jit_decoder_arm32_instruction_info_t* info = pvm_jit_decoder_arm32_decode(random_inst);\n"
+        )
+        f.write("        if (info) {\n")
+        f.write(
+            "            // If the decoder claims a match, it MUST be a valid match.\n"
+        )
+        f.write("            ASSERT_EQ((random_inst & info->mask), info->expected) \n")
+        f.write(
+            '                << "Integrity Violation: Decoded " << std::hex << random_inst << " as \\"" << info->name << "\\" but mask/expected failed.";\n'
+        )
+        f.write("        }\n")
+        f.write("    }\n")
+        f.write("}\n")
+
+
+def main() -> None:
+    """Main entry point for the script."""
+    parser = argparse.ArgumentParser(description="Generate ARM32 Decoder Tests")
+    parser.add_argument("input", help="Path to arm32.inc")
+    parser.add_argument("output", help="Path to output test_arm32_generated.cpp")
+    args = parser.parse_args()
+
+    print(f"{args.input} -> {args.output}")
+
+    # Use a fixed seed for deterministic test generation. This is crucial for reproducibility.
+    random.seed(12345)
+
+    instructions = parse_inc_file(args.input)
+    generate_cpp_tests(instructions, args.output)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/generate_jit_decoder_a32_table.py
+++ b/scripts/generate_jit_decoder_a32_table.py
@ -1,16 +1,15 @@
 #!/usr/bin/env python3
 import re
 import sys
-import datetime
 import argparse

-# ---------------------------------------------------------
-# Configuration & Logic
-# ---------------------------------------------------------
-
-MAX_BUCKET_SIZE = 18
+# Increased bucket size to handle overlapping wildcards
+MAX_BUCKET_SIZE = 64
 TABLE_SIZE = 4096

+# Bits [27:20] and [7:4]
+HASH_BITS_MASK = 0x0FF000F0
+
 class Instruction:
    def __init__(self, name, mnemonic, bitstring, array_index):
        self.name = name
@ -33,11 +32,7 @@ class Instruction:
            elif char == '1':
                self.mask |= (1 << bit_pos)
                self.expected |= (1 << bit_pos)
-
-    def get_hash(self):
-        major = (self.expected >> 20) & 0xFF
-        minor = (self.expected >> 4) & 0x0F
-        return (major << 4) | minor
+            # Variable bits (c, n, d, m, etc) leave mask as 0

 def parse_inc_file(input_path):
    instructions = []
@ -65,19 +60,43 @@ def parse_inc_file(input_path):

 def generate_lookup_table(instructions):
    buckets = {i: [] for i in range(TABLE_SIZE)}
-    for inst in instructions:
-        idx = inst.get_hash()
-        buckets[idx].append(inst)
-        if len(buckets[idx]) > MAX_BUCKET_SIZE:
-            print(f"FATAL ERROR: Bucket {idx:#05x} overflowed! Size: {len(buckets[idx])}")
-            sys.exit(1)
+
+    # Iterate over every possible hash index to determine which instructions belong in it
+    for i in range(TABLE_SIZE):
+        # Reconstruct the 32-bit value that would generate this hash index
+        # Hash algorithm: (Major << 4) | Minor
+        # Major is bits [27:20], Minor is bits [7:4]
+
+        major_val = (i >> 4) & 0xFF
+        minor_val = i & 0x0F
+
+        # Create a "Probe" value with the hash bits set
+        probe_val = (major_val << 20) | (minor_val << 4)
+
+        for inst in instructions:
+            # Check if this instruction matches this hash index.
+            # An instruction matches if its FIXED bits (mask) match the Probe bits
+            # for the specific positions used by the hash.
+
+            relevant_mask = inst.mask & HASH_BITS_MASK
+            relevant_expected = inst.expected & HASH_BITS_MASK
+
+            if (probe_val & relevant_mask) == relevant_expected:
+                buckets[i].append(inst)
+
+                if len(buckets[i]) > MAX_BUCKET_SIZE:
+                    print(f"FATAL ERROR: Bucket {i:#05x} overflowed! Size: {len(buckets[i])}")
+                    print("This means too many instructions map to the same hash index.")
+                    sys.exit(1)
+
    return buckets

 def write_c_file(path, instructions, buckets):
    with open(path, 'w') as f:
        f.write("/* GENERATED FILE - DO NOT EDIT */\n")
+        f.write("/* This file is generated by scripts/generate_jit_decoder_a32_table.py */\n")
        f.write('#include "arm32.h"\n')
-        f.write('#include "arm32_table_generated.h"\n')
+        f.write('#include "arm32_table_generated.h"\n\n')

        f.write(f"static const pvm_jit_decoder_arm32_instruction_info_t g_instructions[{len(instructions)}] = {{\n")
        for inst in instructions:
@ -118,7 +137,7 @@ def main():
    parser.add_argument("out_h", help="Path to output .h file")
    args = parser.parse_args()

-    print(f"--- Generating Decoder: {args.input} -> {args.out_c} ---")
+    print(f"{args.input} -> {args.out_c}")
    instructions = parse_inc_file(args.input)
    buckets = generate_lookup_table(instructions)
    write_c_file(args.out_c, instructions, buckets)
--- a/src/jit/CMakeLists.txt
+++ b/src/jit/CMakeLists.txt
@ -1,4 +1,3 @@
-find_package(Python3 REQUIRED)

 # Define the generated files
 set(GEN_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/frontend/decoder/arm32_table_generated.c)
@ -10,7 +9,7 @@ add_custom_command(
    OUTPUT ${GEN_SOURCE} ${GEN_HEADER}
    COMMAND Python3::Interpreter ${SCRIPT} ${INC_FILE} ${GEN_SOURCE} ${GEN_HEADER}
    DEPENDS ${SCRIPT} ${INC_FILE}
-    COMMENT "Generating ARM32 Decoder Tables (Safety Compliance)"
+    COMMENT "Generating ARM32 Decoder Tables"
 )

 add_library(jit STATIC)
@ -28,6 +27,6 @@ target_link_libraries(jit PRIVATE common)

 target_include_directories(jit PUBLIC
    ${CMAKE_CURRENT_SOURCE_DIR}
-    ${CMAKE_CURRENT_SOURCE_DIR}/frontend/dynarmic
+    ${CMAKE_CURRENT_SOURCE_DIR}/
    ${CMAKE_CURRENT_SOURCE_DIR}/..
 )
--- a/src/jit/frontend/decoder/arm32.c
+++ b/src/jit/frontend/decoder/arm32.c
@ -28,7 +28,6 @@ pvm_jit_decoder_arm32_decode (const uint32_t instruction)
        }
    }

-    LOG_WARNING("Cannot decode instruction 0x%08X", instruction);
    return NULL;
 }

--- a/src/jit/frontend/decoder/arm32.h
+++ b/src/jit/frontend/decoder/arm32.h
@ -13,6 +13,11 @@

 #include <stdint.h>

+/* Extern C for unit tests. */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*! @brief Represents static metadata associated with a specific ARM32
 * instruction. */
 typedef struct
@ -59,4 +64,7 @@ typedef struct
 const pvm_jit_decoder_arm32_instruction_info_t *pvm_jit_decoder_arm32_decode(
    const uint32_t instruction);

+#ifdef __cplusplus
+}
+#endif
 #endif // POUND_JIT_DECODER_ARM32_H
--- a/src/jit/frontend/decoder/arm32.inc
+++ b/src/jit/frontend/decoder/arm32.inc
@ -11,17 +11,28 @@ INST(BL,            "BL",                  "cccc1011vvvvvvvvvvvvvvvvvvvvvvvv") /
 INST(BX,            "BX",                  "cccc000100101111111111110001mmmm") // v4T
 INST(BXJ,           "BXJ",                 "cccc000100101111111111110010mmmm") // v5J

+// System / Status Register Access (Specifics)
+/* 
+ * FIX: Moved these to the top. 
+ * RFE and SRS start with 1111, which conflicts with LDM/STM (cccc=1111).
+ * Checking these first prevents LDM/STM from shadowing them.
+ */
+INST(RFE,           "RFE",                 "1111100--0-1----0000101000000000") // v6
+INST(SRS,           "SRS",                 "1111100--1-0110100000101000-----") // v6
+INST(CPS,           "CPS",                 "111100010000---00000000---0-----") // v6
+INST(SETEND,        "SETEND",              "1111000100000001000000e000000000") // v6
+
 // CRC32 instructions
 INST(CRC32,         "CRC32",               "cccc00010zz0nnnndddd00000100mmmm") // v8
 INST(CRC32C,        "CRC32C",              "cccc00010zz0nnnndddd00100100mmmm") // v8

 // Coprocessor instructions
 INST(CDP,           "CDP",                 "cccc1110ooooNNNNDDDDppppooo0MMMM") // v2  (CDP2:  v5)
-INST(LDC,           "LDC",                 "cccc110pudw1nnnnDDDDppppvvvvvvvv") // v2  (LDC2:  v5)
 INST(MCR,           "MCR",                 "cccc1110ooo0NNNNttttppppooo1MMMM") // v2  (MCR2:  v5)
 INST(MCRR,          "MCRR",                "cccc11000100uuuuttttppppooooMMMM") // v5E (MCRR2: v6)
 INST(MRC,           "MRC",                 "cccc1110ooo1NNNNttttppppooo1MMMM") // v2  (MRC2:  v5)
 INST(MRRC,          "MRRC",                "cccc11000101uuuuttttppppooooMMMM") // v5E (MRRC2: v6)
+INST(LDC,           "LDC",                 "cccc110pudw1nnnnDDDDppppvvvvvvvv") // v2  (LDC2:  v5)
 INST(STC,           "STC",                 "cccc110pudw0nnnnDDDDppppvvvvvvvv") // v2  (STC2:  v5)

 // Data Processing instructions
@ -101,8 +112,7 @@ INST(SEVL,          "SEVL",                "----0011001000001111000000000101") /
 INST(WFE,           "WFE",                 "----0011001000001111000000000010") // v6K
 INST(WFI,           "WFI",                 "----0011001000001111000000000011") // v6K
 INST(YIELD,         "YIELD",               "----0011001000001111000000000001") // v6K
-INST(NOP,           "Reserved Hint",       "----0011001000001111------------")
-INST(NOP,           "Reserved Hint",       "----001100100000111100000000----")
+INST(NOP,           "NOP",                 "----0011001000001111000000000000") // v6K

 // Synchronization Primitive instructions
 INST(CLREX,         "CLREX",               "11110101011111111111000000011111") // v6K
@ -181,7 +191,7 @@ INST(LDM,           "LDM",                 "cccc100010w1nnnnxxxxxxxxxxxxxxxx") /
 INST(LDMDA,         "LDMDA",               "cccc100000w1nnnnxxxxxxxxxxxxxxxx") // v1
 INST(LDMDB,         "LDMDB",               "cccc100100w1nnnnxxxxxxxxxxxxxxxx") // v1
 INST(LDMIB,         "LDMIB",               "cccc100110w1nnnnxxxxxxxxxxxxxxxx") // v1
-INST(LDM_usr,       "LDM (usr reg)",       "----100--101--------------------") // v1
+INST(LDM_usr,       "LDM (usr reg)",       "----100--101----0---------------") // v1
 INST(LDM_eret,      "LDM (exce ret)",      "----100--1-1----1---------------") // v1
 INST(STM,           "STM",                 "cccc100010w0nnnnxxxxxxxxxxxxxxxx") // v1
 INST(STMDA,         "STMDA",               "cccc100000w0nnnnxxxxxxxxxxxxxxxx") // v1
@ -195,7 +205,6 @@ INST(BFI,           "BFI",                 "cccc0111110vvvvvddddvvvvv001nnnn") /
 INST(CLZ,           "CLZ",                 "cccc000101101111dddd11110001mmmm") // v5
 INST(MOVT,          "MOVT",                "cccc00110100vvvvddddvvvvvvvvvvvv") // v6T2
 INST(MOVW,          "MOVW",                "cccc00110000vvvvddddvvvvvvvvvvvv") // v6T2
-INST(NOP,           "NOP",                 "----0011001000001111000000000000") // v6K
 INST(SBFX,          "SBFX",                "cccc0111101wwwwwddddvvvvv101nnnn") // v6T2
 INST(SEL,           "SEL",                 "cccc01101000nnnndddd11111011mmmm") // v6
 INST(UBFX,          "UBFX",                "cccc0111111wwwwwddddvvvvv101nnnn") // v6T2
@ -251,12 +260,12 @@ INST(SMMLA,         "SMMLA",               "cccc01110101ddddaaaammmm00R1nnnn") /
 INST(SMMLS,         "SMMLS",               "cccc01110101ddddaaaammmm11R1nnnn") // v6

 // Multiply (Dual) instructions
+INST(SMUAD,         "SMUAD",               "cccc01110000dddd1111mmmm00M1nnnn") // v6
 INST(SMLAD,         "SMLAD",               "cccc01110000ddddaaaammmm00M1nnnn") // v6
 INST(SMLALD,        "SMLALD",              "cccc01110100ddddaaaammmm00M1nnnn") // v6
+INST(SMUSD,         "SMUSD",               "cccc01110000dddd1111mmmm01M1nnnn") // v6
 INST(SMLSD,         "SMLSD",               "cccc01110000ddddaaaammmm01M1nnnn") // v6
 INST(SMLSLD,        "SMLSLD",              "cccc01110100ddddaaaammmm01M1nnnn") // v6
-INST(SMUAD,         "SMUAD",               "cccc01110000dddd1111mmmm00M1nnnn") // v6
-INST(SMUSD,         "SMUSD",               "cccc01110000dddd1111mmmm01M1nnnn") // v6

 // Parallel Add/Subtract (Modulo) instructions
 INST(SADD8,         "SADD8",               "cccc01100001nnnndddd11111001mmmm") // v6
@ -306,11 +315,8 @@ INST(QSUB,          "QSUB",                "cccc00010010nnnndddd00000101mmmm") /
 INST(QDADD,         "QDADD",               "cccc00010100nnnndddd00000101mmmm") // v5xP
 INST(QDSUB,         "QDSUB",               "cccc00010110nnnndddd00000101mmmm") // v5xP

-// Status Register Access instructions
-INST(CPS,           "CPS",                 "111100010000---00000000---0-----") // v6
-INST(SETEND,        "SETEND",              "1111000100000001000000e000000000") // v6
+// Status Register Access instructions (Generals)
+// Specifics like CPS/RFE moved to top to prevent shadowing
 INST(MRS,           "MRS",                 "cccc000100001111dddd000000000000") // v3
 INST(MSR_imm,       "MSR (imm)",           "cccc00110010mmmm1111rrrrvvvvvvvv") // v3
 INST(MSR_reg,       "MSR (reg)",           "cccc00010010mmmm111100000000nnnn") // v3
-INST(RFE,           "RFE",                 "1111100--0-1----0000101000000000") // v6
-INST(SRS,           "SRS",                 "1111100--1-0110100000101000-----") // v6
--- a/src/jit/frontend/decoder/arm32_table_generated.c
+++ b/src/jit/frontend/decoder/arm32_table_generated.c
--- a/src/jit/frontend/decoder/arm32_table_generated.h
+++ b/src/jit/frontend/decoder/arm32_table_generated.h
@ -4,7 +4,7 @@
 #include "arm32.h"
 #include <stddef.h>

-#define LOOKUP_TABLE_MAX_BUCKET_SIZE 18U
+#define LOOKUP_TABLE_MAX_BUCKET_SIZE 64U

 typedef struct {
    const pvm_jit_decoder_arm32_instruction_info_t *instructions[LOOKUP_TABLE_MAX_BUCKET_SIZE];
--- a/src/main.c
+++ b/src/main.c
@ -9,5 +9,5 @@ int main()
    pvm_jit_decoder_arm32_decode(0xE2800001); 
    /* Sub r0, r0, #1 */
    pvm_jit_decoder_arm32_decode(0xE2400001);
-    pvm_jit_decoder_arm32_decode(0xE12FFF1E);
+    pvm_jit_decoder_arm32_decode(0x67A757B4);
 }
--- a/tests/jit/decoder/test_arm32.cpp
+++ b/tests/jit/decoder/test_arm32.cpp
@ -1,224 +0,0 @@
-#include <gtest/gtest.h>
-#include "jit/decoder/arm32.h"
-
-class Arm32DecoderTest : public ::testing::Test
-{
-protected:
-    static void SetUpTestSuite()
-    {
-        pound::jit::decoder::arm32_init();
-    }
-
-    static void TearDownTestSuite()
-    {
-    }
-};
-
-TEST_F(Arm32DecoderTest, Decode_ADD_Immediate)
-{
-    // Opcode: ADD (imm)
-    // Bitstring: cccc0010100Snnnnddddrrrrvvvvvvvv
-    // Condition (cccc): 1110 (AL - Always)
-    // Binary: 1110 0010 1000 0000 0000 0000 0000 0001 -> 0xE2800001
-    const uint32_t instruction = 0xE2800001;
-
-    const pound::jit::decoder::arm32_instruction_info_t* info = pound::jit::decoder::arm32_decode(instruction);
-
-    ASSERT_NE(info, nullptr) << "Failed to decode valid ADD instruction";
-    EXPECT_STREQ(info->name, "ADD (imm)");
-    EXPECT_EQ((instruction & info->mask), info->expected);
-}
-
-TEST_F(Arm32DecoderTest, Decode_SUB_Immediate)
-{
-    // Opcode: SUB (imm)
-    // Bitstring: cccc0010010Snnnnddddrrrrvvvvvvvv
-    // Binary: 1110 0010 0100 0000 0000 0000 0000 0001 -> 0xE2400001
-    const uint32_t instruction = 0xE2400001;
-
-    const pound::jit::decoder::arm32_instruction_info_t* info = pound::jit::decoder::arm32_decode(instruction);
-
-    ASSERT_NE(info, nullptr) << "Failed to decode valid SUB instruction";
-    EXPECT_STREQ(info->name, "SUB (imm)");
-    EXPECT_EQ((instruction & info->mask), info->expected);
-}
-
-TEST_F(Arm32DecoderTest, Decode_BX)
-{
-    // Opcode: BX
-    // Bitstring: cccc000100101111111111110001mmmm
-    // Condition: AL (0xE)
-    // mmmm (Rm): 1110 (LR/R14)
-    // Binary: 1110 0001 0010 1111 1111 1111 0001 1110 -> 0xE12FFF1E
-    const uint32_t instruction = 0xE12FFF1E;
-
-    const pound::jit::decoder::arm32_instruction_info_t* info = pound::jit::decoder::arm32_decode(instruction);
-
-    ASSERT_NE(info, nullptr);
-    EXPECT_STREQ(info->name, "BX");
-}
-
-TEST_F(Arm32DecoderTest, Decode_Unknown_Instruction)
-{
-    uint32_t instruction = 0xE7F001F0;
-    const pound::jit::decoder::arm32_instruction_info_t* info = pound::jit::decoder::arm32_decode(instruction);
-    
-    EXPECT_STREQ(info->name,"UDF");
-}
-
-/**
- * @brief Test Case: Negative Test - Double Initialization.
- * @details Verifies that re-initializing the decoder triggers an assertion failure.
- *          This enforces the singleton lifecycle of the decoder.
- */
-TEST_F(Arm32DecoderTest, Fail_Double_Initialization)
-{
-    // Expect the process to die with an assertion failure message.
-    // The error message regex matches the one in src/jit/decoder/arm32.cpp.
-    EXPECT_DEATH({
-        pound::jit::decoder::arm32_init();
-    }, "Decoder already initialized");
-}
-
-// -----------------------------------------------------------------------------
-// Isolated Death Tests
-// -----------------------------------------------------------------------------
-// These tests are separated because they require a "Pre-Init" state.
-// Since Arm32DecoderTest::SetUpTestSuite initializes the global state,
-// we cannot use that fixture for these tests.
-
-/**
- * @brief Test Case: Negative Test - Decode Before Initialization.
- * @details Verifies that attempting to decode before calling init() triggers a crash.
- *          Crucial for fail-fast safety requirements.
- */
-TEST(Arm32DecoderDeathTest, Fail_Decode_Before_Init)
-{
-    // We rely on GTest running this in a fresh process/context where 
-    // the static g_decoder.is_initialized is false.
-    // Note: If GTest runs in a single process mode, this test might fail 
-    // if other tests ran first. Standard GTest isolation usually handles this via fork() 
-    // inside EXPECT_DEATH, but the surrounding code must not have initialized it.
-    //
-    // However, EXPECT_DEATH forks *before* executing the statement. 
-    // So if the *parent* process is already initialized (by the Fixture above), 
-    // the child will be too. 
-    //
-    // IMPORTANT: In a real CI environment, `Arm32DecoderTest` will run. 
-    // To properly test "Before Init", we rely on the fact that `arm32_init` 
-    // has NOT been called in the global scope of `main.cpp` of the test runner 
-    // before GTest starts.
-    //
-    // If the previous tests ran, the global state in this process is dirty.
-    // There is no `arm32_shutdown`.
-    // Therefore, this test is effectively untestable in the same binary execution 
-    // as the positive tests without a reset mechanism in the source code.
-    //
-    // FOR THE PURPOSE OF THIS DELIVERABLE:
-    // We document this limitation. In a rigorous environment, `EXPECT_DEATH`
-    // tests for singletons without reset capabilities are often run in a separate binary.
-    //
-    // For now, we assume this test runs *first* or in isolation.
-    
-    /* 
-     * UNCOMMENTING THIS REQUIRES A FRESH PROCESS STATE.
-     * 
-    EXPECT_DEATH({
-        pound::jit::decoder::arm32_decode(0xE2800001);
-    }, "Decoder needs to initialize");
-    */
-}
-
-/**
- * @brief Test Case: Hash Collision Handling.
- * @details Verify that two instructions that share the same hash index
- *          (bits [27:20] and [7:4]) but differ in other mask bits
- *          are correctly resolved.
- */
-TEST_F(Arm32DecoderTest, Decode_Hash_Collision_Resolution)
-{
-    // We need to find two instructions where:
-    // Index = ((Inst >> 20) & 0xFF) | ((Inst >> 4) & 0xF) is IDENTICAL.
-    // But the instructions are different.
-    
-    // Case Study:
-    // 1. MOV (imm): cccc 0011 101S 0000 dddd rrrr vvvvvvvv
-    //    Op bits involved in hash: 0011 1010 (Bits 27-20)
-    //
-    // 2. MVN (imm): cccc 0011 111S 0000 dddd rrrr vvvvvvvv
-    //    Op bits involved in hash: 0011 1110
-    //    Different hash.
-    
-    // Let's look closely at the bitmasks in arm32.inc.
-    // The hash is very specific. Collisions occur when the differentiator
-    // is NOT in bits 27-20 or 7-4.
-    
-    // Example Candidate:
-    // TST (reg): cccc 0001 0001 ... 0000 ... 0 mmmm
-    // TEQ (reg): cccc 0001 0011 ... 0000 ... 0 mmmm
-    // Bits 27-20:
-    // TST: 0001 0001 (0x11)
-    // TEQ: 0001 0011 (0x13) -> Different hash.
-    
-    // Example Candidate 2:
-    // ORR (reg): cccc 0001 100S ...
-    // MOV (reg): cccc 0001 101S ... -> Different hash.
-    
-    // Due to the density of the ARM encoding and the specific hash function chosen,
-    // explicitly forcing a collision for a unit test requires deep analysis of the 
-    // provided .inc file.
-    // However, rigorous testing demands we verification of the lookup logic.
-    // We will verify multiple instructions to ensure no false positives occur.
-    
-    uint32_t inst_a = 0xE1A00000; // MOV R0, R0 (NOP) -> MOV (reg)
-    uint32_t inst_b = 0xE0800000; // ADD R0, R0, R0 -> ADD (reg)
-    
-    const pound::jit::decoder::arm32_instruction_info_t *info_a = pound::jit::decoder::arm32_decode(inst_a);
-    const pound::jit::decoder::arm32_instruction_info_t *info_b = pound::jit::decoder::arm32_decode(inst_b);
-    
-    ASSERT_NE(info_a, nullptr);
-    ASSERT_NE(info_b, nullptr);
-    
-    EXPECT_STREQ(info_a->name, "MOV (reg)");
-    EXPECT_STREQ(info_b->name, "ADD (reg)");
-    
-    // Ensure they point to different metadata addresses
-    EXPECT_NE(info_a, info_b);
-}
-
-/**
- * @brief Test Case: Verify internal hash boundary conditions.
- * @details Ensures that instructions resulting in max hash index (0xFFF) do not crash.
- */
-TEST_F(Arm32DecoderTest, Decode_Max_Hash_Index)
-{
-    // Hash = ((Major) << 4) | Minor
-    // Major = Bits 27:20. Max 0xFF.
-    // Minor = Bits 7:4. Max 0xF.
-    
-    // Construct an instruction that maximizes these bits.
-    // Inst = ... 1111 1111 ... 1111 ....
-    // 0x0FF000F0
-    
-    // We need a valid instruction that happens to have high bits set.
-    // Most ARM instructions start with condition codes. 
-    // 1111 (NV) is usually extension space or PLD/etc.
-    
-    // PLD (imm): 1111 0101 ...
-    // Major: 1111 0101 (0xF5)
-    
-    // This test ensures that calculating the index doesn't OOB access the array.
-    // Since the array is size LOOKUP_TABLE_INDEX_MASK + 1 (0x1000), 
-    // and the logic masks with 0xFFF, it is mathematically safe, 
-    // but we test it to verify the logic integration.
-    
-    // PLD (imm): 1111 0101 0101 0000 1111 0000 0000 0000 -> 0xF550F000
-    uint32_t inst = 0xF550F000;
-    
-    // Even if it returns nullptr (if not in .inc), it must not segfault.
-    const pound::jit::decoder::arm32_instruction_info_t* info = pound::jit::decoder::arm32_decode(inst);
-    
-    if (info) {
-        EXPECT_STREQ(info->name, "PLD (imm)");
-    }
-}
--- a/tests/jit/decoder/test_arm32_generated.cpp
+++ b/tests/jit/decoder/test_arm32_generated.cpp