mirror of
https://github.com/pound-emu/pound.git
synced 2025-12-13 04:36:57 +00:00
jit/decoder: Add generated arm32 tests
Introduces the first unit tests for the ARM32 JIT decoder. A new script automatically generates a test case for every instruction in arm32.inc, providing 100% of the isa. This also includes a critical rework of the decoder's lookup table generation logic. The previous hashing method was flawed, causing build-time overflows and incorrect instruction matching (shadowing) for patterns with wildcards. The new algorithm correctly populates the lookup table. Signed-off-by: Ronald Caesar <github43132@proton.me>
This commit is contained in:
parent
c235e57071
commit
d1e3919a8c
13 changed files with 37513 additions and 502 deletions
14
.github/workflows/build.yml
vendored
14
.github/workflows/build.yml
vendored
|
|
@ -114,15 +114,16 @@ jobs:
|
||||||
|
|
||||||
- name: Install Dependencies
|
- name: Install Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install llvm
|
brew install llvm@20
|
||||||
|
echo "CMAKE_PREFIX_PATH=/usr/local/opt/llvm@20" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Configure CMake (x86_64)
|
- name: Configure CMake (x86_64)
|
||||||
run: >
|
run: >
|
||||||
cmake -G Ninja -B "${{env.BUILD_DIR}}"
|
cmake -G Ninja -B "${{env.BUILD_DIR}}"
|
||||||
-DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
|
-DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
|
||||||
-DCMAKE_OSX_ARCHITECTURES=x86_64
|
-DCMAKE_OSX_ARCHITECTURES=x86_64
|
||||||
-DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang
|
-DCMAKE_C_COMPILER=/usr/local/opt/llvm@20/bin/clang
|
||||||
-DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++
|
-DCMAKE_CXX_COMPILER=/usr/local/opt/llvm@20/bin/clang++
|
||||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=10.15
|
-DCMAKE_OSX_DEPLOYMENT_TARGET=10.15
|
||||||
|
|
||||||
- name: Build (x86_64)
|
- name: Build (x86_64)
|
||||||
|
|
@ -156,14 +157,15 @@ jobs:
|
||||||
|
|
||||||
- name: Install Dependencies
|
- name: Install Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install llvm
|
brew install llvm@20
|
||||||
|
echo "CMAKE_PREFIX_PATH=/opt/homebrew/opt/llvm@20" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Configure CMake (ARM64)
|
- name: Configure CMake (ARM64)
|
||||||
run: >
|
run: >
|
||||||
cmake -G Ninja -B "${{env.BUILD_DIR}}"
|
cmake -G Ninja -B "${{env.BUILD_DIR}}"
|
||||||
-DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
|
-DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
|
||||||
-DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang
|
-DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm@20/bin/clang
|
||||||
-DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++
|
-DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm@20/bin/clang++
|
||||||
-DCMAKE_OSX_ARCHITECTURES=arm64
|
-DCMAKE_OSX_ARCHITECTURES=arm64
|
||||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=10.15
|
-DCMAKE_OSX_DEPLOYMENT_TARGET=10.15
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,8 @@ endif()
|
||||||
set(CMAKE_C_STANDARD 11)
|
set(CMAKE_C_STANDARD 11)
|
||||||
set(CMAKE_C_STANDARD_REQUIRED TRUE)
|
set(CMAKE_C_STANDARD_REQUIRED TRUE)
|
||||||
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
|
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||||
|
|
||||||
#-------------------------------
|
#-------------------------------
|
||||||
|
|
@ -91,17 +93,29 @@ message(STATUS "All submodules verified successfully")
|
||||||
#-----------------------------
|
#-----------------------------
|
||||||
# ---- Target Definitions ----
|
# ---- Target Definitions ----
|
||||||
#-----------------------------
|
#-----------------------------
|
||||||
|
|
||||||
add_executable(Pound
|
add_executable(Pound
|
||||||
src/main.c
|
src/main.c
|
||||||
)
|
)
|
||||||
|
|
||||||
set(TEST_SRC
|
find_package(Python3 REQUIRED)
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/tests/jit/ir/test_value.cpp
|
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/tests/jit/decoder/test_arm32.cpp
|
# Define the test generation command
|
||||||
|
set(GEN_TEST_SRC ${CMAKE_CURRENT_SOURCE_DIR}/tests/jit/decoder/test_arm32_generated.cpp)
|
||||||
|
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT ${GEN_TEST_SRC}
|
||||||
|
COMMAND Python3::Interpreter ${CMAKE_SOURCE_DIR}/scripts/generate_decoder_tests.py
|
||||||
|
${CMAKE_SOURCE_DIR}/src/jit/frontend/decoder/arm32.inc
|
||||||
|
${GEN_TEST_SRC}
|
||||||
|
DEPENDS ${CMAKE_SOURCE_DIR}/scripts/generate_decoder_tests.py
|
||||||
|
${CMAKE_SOURCE_DIR}/src/jit/frontend/decoder/arm32.inc
|
||||||
|
COMMENT "Generating ARM32 Decoder Tests"
|
||||||
)
|
)
|
||||||
|
|
||||||
#add_executable(tests ${TEST_SRC})
|
# Add to test executable
|
||||||
|
add_executable(tests
|
||||||
|
${GEN_TEST_SRC}
|
||||||
|
)
|
||||||
|
|
||||||
add_subdirectory(3rd_Party)
|
add_subdirectory(3rd_Party)
|
||||||
add_subdirectory(src/common)
|
add_subdirectory(src/common)
|
||||||
|
|
@ -161,8 +175,8 @@ target_link_libraries(Pound PRIVATE
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
#target_link_libraries(tests PRIVATE
|
target_link_libraries(tests PRIVATE
|
||||||
# jit
|
jit
|
||||||
# gtest
|
gtest
|
||||||
# gtest_main
|
gtest_main
|
||||||
#)
|
)
|
||||||
|
|
|
||||||
454
scripts/generate_decoder_tests.py
Normal file
454
scripts/generate_decoder_tests.py
Normal file
|
|
@ -0,0 +1,454 @@
|
||||||
|
"""
|
||||||
|
This script employs several sophisticated techniques to ensure the quality and
|
||||||
|
correctness of the generated tests.
|
||||||
|
|
||||||
|
|
||||||
|
# Instruction Parsing
|
||||||
|
|
||||||
|
The script begins by parsing the `arm32.inc` file. It uses a regular expression
|
||||||
|
to find all occurrences of the `INST()` macro and extracts three key pieces of
|
||||||
|
information for each instruction:
|
||||||
|
|
||||||
|
1. **Mnemonic**: A short, unique identifier (e.g., `ADD_imm`).
|
||||||
|
2. **Name**: A human-readable description (e.g., `"ADD (imm)"`).
|
||||||
|
3. **Bitstring**: A 32-character string representing the instruction's binary
|
||||||
|
encoding.
|
||||||
|
|
||||||
|
The bitstring is the most critical piece. It's a mix of `'0'`, `'1'`, and
|
||||||
|
wildcard characters (like `v`, `n`, `c`) that represent variable fields.
|
||||||
|
|
||||||
|
|
||||||
|
# Randomized Instantiation & Constraint System
|
||||||
|
|
||||||
|
To create a concrete test case from an abstract bitstring, the script must
|
||||||
|
generate a valid 32-bit integer. It does this by:
|
||||||
|
|
||||||
|
1. Setting the fixed `'0'` and `'1'` bits.
|
||||||
|
2. Randomly generating `'0'` or `'1'` for all wildcard bits.
|
||||||
|
|
||||||
|
However, simple randomization can be problematic. Certain ARM instructions
|
||||||
|
are specializations of more general patterns. For example, the `SXTB`
|
||||||
|
instruction is a special case of the `SXTAB` instruction where the `Rn`
|
||||||
|
register field is `1111`.
|
||||||
|
|
||||||
|
If we randomly generate an `SXTAB` test case where `Rn` happens to be `1111`,
|
||||||
|
the decoder might (correctly) identify it as `SXTB`. This would cause the
|
||||||
|
`SXTAB` test to fail.
|
||||||
|
|
||||||
|
To prevent this, the script uses a **constraint system**
|
||||||
|
(`get_instruction_constraints`). This function defines rules to avoid
|
||||||
|
generating ambiguous encodings. When generating a test for a general
|
||||||
|
instruction (`SXTAB`), it forces the specialized bits (`Rn`) to be a value other
|
||||||
|
than the one that would cause it to alias to the more specific instruction
|
||||||
|
(`SXTB`). This ensures each test validates exa ctly one unique instruction
|
||||||
|
definition.
|
||||||
|
|
||||||
|
|
||||||
|
# Oracle-Based Negative Testing
|
||||||
|
|
||||||
|
The most powerful feature of this script is its **negative verification**
|
||||||
|
strategy. For every fixed `'0'` or `'1'` bit in an instruction's bitstring,
|
||||||
|
the script generates a test case where that single bit is flipped.
|
||||||
|
This creates an instruction that is intentionally invalid *for that specific
|
||||||
|
pattern*.
|
||||||
|
|
||||||
|
The script then uses an "oracle" — a Python-based reference decoder
|
||||||
|
(`python_decode`) — to predict what this corrupted instruction *should*
|
||||||
|
decode to. The corrupted value might match a different valid instruction, or
|
||||||
|
it might be completely invalid (decode to `NULL`).
|
||||||
|
|
||||||
|
The generated C++ test then asserts that the actual C++ decoder's output
|
||||||
|
exactly matches the oracle's prediction. This guarantees that the decoder
|
||||||
|
rejects invalid patterns that are only one bit off from a valid one.
|
||||||
|
|
||||||
|
|
||||||
|
# Fuzz Testing
|
||||||
|
|
||||||
|
Finally, the script generates a fuzz test that feeds a large number (100,000)
|
||||||
|
of completely random 32-bit integers to the decoder. This test serves as a
|
||||||
|
stability and integrity check. If the decoder identifies any of these random
|
||||||
|
inputs as a valid instruction, it cross-verifies that the input truly matches
|
||||||
|
the mask and expected value for that instruction. This ensures the decoder
|
||||||
|
never produces "false positives" and is robust against arbitrary data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
import random
|
||||||
|
from typing import List, Dict, Optional, Tuple
|
||||||
|
|
||||||
|
CPP_HEADER = """/*
|
||||||
|
* GENERATED FILE - DO NOT EDIT
|
||||||
|
*
|
||||||
|
* This file is generated by scripts/generate_decoder_tests.py
|
||||||
|
*
|
||||||
|
* PURPOSE:
|
||||||
|
* Provides 100% requirements-based test coverage for the ARM32 Instruction Decoder.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "jit/frontend/decoder/arm32.h"
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
|
class Arm32DecoderGeneratedTest : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
void SetUp() override {
|
||||||
|
}
|
||||||
|
};
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class Instruction:
|
||||||
|
"""A container for a parsed instruction definition."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
mnemonic: str,
|
||||||
|
name: str,
|
||||||
|
bitstring: str,
|
||||||
|
val: int,
|
||||||
|
mask: int,
|
||||||
|
expected: int,
|
||||||
|
):
|
||||||
|
self.mnemonic: str = mnemonic
|
||||||
|
self.name: str = name
|
||||||
|
self.bitstring: str = bitstring
|
||||||
|
self.val: int = val # A randomly generated valid encoding
|
||||||
|
self.mask: int = mask
|
||||||
|
self.expected: int = expected
|
||||||
|
|
||||||
|
|
||||||
|
def get_instruction_constraints(name: str) -> Dict[int, int]:
|
||||||
|
"""
|
||||||
|
Returns a dictionary of {bit_index: value} to force specific instructions
|
||||||
|
to avoid generating encodings that belong to other, more specific instructions.
|
||||||
|
This prevents instruction aliasing during randomized test case generation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: The human-readable name of the instruction (e.g., "SXTAB").
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A dictionary mapping bit positions (31-0) to a required value (0 or 1).
|
||||||
|
"""
|
||||||
|
constraints: Dict[int, int] = {}
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Load/Store & Coprocessor Collisions
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# To ensure LDC doesn't look like MRRC, we force P=1 (Bit 24). MRRC requires bit 24=0.
|
||||||
|
if name in ["LDC", "LDC2", "STC", "STC2"]:
|
||||||
|
constraints[24] = 1
|
||||||
|
|
||||||
|
# Unprivileged Loads (LDRT/STRT) become aliases if P=0, W=1.
|
||||||
|
# Force P=1 for standard loads to avoid these aliases.
|
||||||
|
if name in [
|
||||||
|
"LDR (reg)",
|
||||||
|
"LDRB (reg)",
|
||||||
|
"LDRH (reg)",
|
||||||
|
"LDRSB (reg)",
|
||||||
|
"LDRSH (reg)",
|
||||||
|
"STR (reg)",
|
||||||
|
"STRB (reg)",
|
||||||
|
"STRH (reg)",
|
||||||
|
"LDRD (reg)",
|
||||||
|
"STRD (reg)",
|
||||||
|
]:
|
||||||
|
constraints[24] = 1
|
||||||
|
if name in ["LDR (imm)", "LDRB (imm)", "STR (imm)", "STRB (imm)"]:
|
||||||
|
constraints[24] = 1
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Extend Instructions (e.g., SXTB/SXTAB)
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# SXTB is SXTAB with Rn=1111 (bits 19-16).
|
||||||
|
# When testing the more generic SXTAB, ensure Rn is not 1111. We force 0.
|
||||||
|
if name in ["SXTAB", "SXTAB16", "SXTAH", "UXTAB", "UXTAB16", "UXTAH"]:
|
||||||
|
constraints[19] = 0
|
||||||
|
constraints[18] = 0
|
||||||
|
constraints[17] = 0
|
||||||
|
constraints[16] = 0
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Multiply Instructions
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# SMMUL is SMMLA with Ra=1111. Force Ra!=1111 for SMMLA tests.
|
||||||
|
if name in ["SMMLA", "SMMLS"]:
|
||||||
|
constraints[15] = 0
|
||||||
|
constraints[14] = 0
|
||||||
|
constraints[13] = 0
|
||||||
|
constraints[12] = 0
|
||||||
|
|
||||||
|
# SMUAD is SMLAD with Ra=1111. Force Ra!=1111 for SMLAD-family tests.
|
||||||
|
if name in ["SMLAD", "SMLSD", "SMLALD", "SMLSLD"]:
|
||||||
|
constraints[15] = 0
|
||||||
|
constraints[14] = 0
|
||||||
|
constraints[13] = 0
|
||||||
|
constraints[12] = 0
|
||||||
|
|
||||||
|
return constraints
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_mask_and_expected(bitstring: str) -> Tuple[int, int]:
|
||||||
|
"""
|
||||||
|
Calculates the mask and expected value from a bitstring pattern.
|
||||||
|
- '1' sets the bit in both mask and expected.
|
||||||
|
- '0' sets the bit in the mask only.
|
||||||
|
- Wildcards leave the bit as 0 in both.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
bitstring: The 32-character instruction pattern.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A tuple containing the (mask, expected) integer values.
|
||||||
|
"""
|
||||||
|
mask: int = 0
|
||||||
|
expected: int = 0
|
||||||
|
for i, char in enumerate(bitstring):
|
||||||
|
bit_pos = 31 - i
|
||||||
|
if char == "0":
|
||||||
|
mask |= 1 << bit_pos
|
||||||
|
elif char == "1":
|
||||||
|
mask |= 1 << bit_pos
|
||||||
|
expected |= 1 << bit_pos
|
||||||
|
return mask, expected
|
||||||
|
|
||||||
|
|
||||||
|
def parse_bitstring_randomized(name: str, bitstring: str) -> int:
|
||||||
|
"""
|
||||||
|
Generates a concrete, valid instruction word from a bitstring pattern.
|
||||||
|
Applies constraints to avoid generating ambiguous instruction aliases.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: The human-readable name of the instruction.
|
||||||
|
bitstring: The 32-character instruction pattern.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A 32-bit integer representing a valid encoding of the instruction.
|
||||||
|
"""
|
||||||
|
val: int = 0
|
||||||
|
if len(bitstring) != 32:
|
||||||
|
raise ValueError(f"Invalid bitstring length: {len(bitstring)}")
|
||||||
|
|
||||||
|
constraints = get_instruction_constraints(name)
|
||||||
|
|
||||||
|
for i, char in enumerate(bitstring):
|
||||||
|
bit_pos = 31 - i
|
||||||
|
# Apply constraints first if they exist for this bit
|
||||||
|
if bit_pos in constraints:
|
||||||
|
if constraints[bit_pos] == 1:
|
||||||
|
val |= 1 << bit_pos
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Set fixed bits or randomize wildcard bits
|
||||||
|
if char == "1":
|
||||||
|
val |= 1 << bit_pos
|
||||||
|
elif char not in ("0", "1"):
|
||||||
|
if random.choice([True, False]):
|
||||||
|
val |= 1 << bit_pos
|
||||||
|
return val
|
||||||
|
|
||||||
|
|
||||||
|
def parse_inc_file(input_path: str) -> List[Instruction]:
|
||||||
|
"""
|
||||||
|
Parses an arm32.inc file and returns a list of Instruction objects.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_path: The path to the arm32.inc file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of Instruction objects, one for each INST macro found.
|
||||||
|
"""
|
||||||
|
instructions: List[Instruction] = []
|
||||||
|
regex = re.compile(r'INST\(\s*([A-Za-z0-9_]+),\s*"(.*?)",\s*"(.*?)"\s*\)')
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(input_path, "r") as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
except FileNotFoundError:
|
||||||
|
print(f"Error: Could not find input file: {input_path}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith("//"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
match = regex.search(line)
|
||||||
|
if match:
|
||||||
|
mnemonic = match.group(1)
|
||||||
|
name = match.group(2)
|
||||||
|
bitstring = match.group(3)
|
||||||
|
|
||||||
|
val = parse_bitstring_randomized(name, bitstring)
|
||||||
|
mask, expected = calculate_mask_and_expected(bitstring)
|
||||||
|
|
||||||
|
# Manual Patch for MSR (imm), which has a complex, non-randomizable constraint.
|
||||||
|
# The bitstring is cccc00110010mmmm1111rrrrvvvvvvvv, but if `vvvv` fields are all 0,
|
||||||
|
# it becomes a different instruction. We force a non-zero immediate to ensure a valid MSR.
|
||||||
|
if name == "MSR (imm)":
|
||||||
|
val |= 1 << 16 # Set bit 16 of the immediate field
|
||||||
|
|
||||||
|
instructions.append(
|
||||||
|
Instruction(mnemonic, name, bitstring, val, mask, expected)
|
||||||
|
)
|
||||||
|
return instructions
|
||||||
|
|
||||||
|
|
||||||
|
def python_decode(val: int, instructions: List[Instruction]) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Acts as a reference decoder (oracle). Returns the name of the first instruction that matches 'val'.
|
||||||
|
This simulates the linear scan of the C decoder to predict the correct result.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
val: The 32-bit instruction word to decode.
|
||||||
|
instructions: The list of instruction definitions, in order of precedence.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The name of the matching instruction, or None if no match is found.
|
||||||
|
"""
|
||||||
|
for inst in instructions:
|
||||||
|
if (val & inst.mask) == inst.expected:
|
||||||
|
return inst.name
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def generate_cpp_tests(instructions: List[Instruction], output_path: str) -> None:
|
||||||
|
"""
|
||||||
|
Generates the C++ test file content and writes it to the output path.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
instructions: A list of all instruction definitions.
|
||||||
|
output_path: The path to write the generated .cpp file.
|
||||||
|
"""
|
||||||
|
with open(output_path, "w") as f:
|
||||||
|
f.write(CPP_HEADER)
|
||||||
|
|
||||||
|
mnemonic_counts: Dict[str, int] = {}
|
||||||
|
|
||||||
|
for inst in instructions:
|
||||||
|
base_mnemonic = inst.mnemonic
|
||||||
|
val = inst.val
|
||||||
|
name = inst.name
|
||||||
|
bitstring = inst.bitstring
|
||||||
|
|
||||||
|
# Generate a unique test name, handling multiple definitions for one mnemonic
|
||||||
|
if base_mnemonic not in mnemonic_counts:
|
||||||
|
mnemonic_counts[base_mnemonic] = 1
|
||||||
|
test_name = f"Verify_{base_mnemonic}"
|
||||||
|
else:
|
||||||
|
mnemonic_counts[base_mnemonic] += 1
|
||||||
|
count = mnemonic_counts[base_mnemonic]
|
||||||
|
test_name = f"Verify_{base_mnemonic}_{count}"
|
||||||
|
|
||||||
|
f.write(f"TEST_F(Arm32DecoderGeneratedTest, {test_name}) {{\n")
|
||||||
|
|
||||||
|
# --- 1. Positive Verification ---
|
||||||
|
f.write(
|
||||||
|
f' // 1. Positive Verification: Ensures "{name}" is correctly identified.\n'
|
||||||
|
)
|
||||||
|
f.write(f" const uint32_t valid_inst = {val:#010x};\n")
|
||||||
|
f.write(
|
||||||
|
f" const pvm_jit_decoder_arm32_instruction_info_t* info = pvm_jit_decoder_arm32_decode(valid_inst);\n\n"
|
||||||
|
)
|
||||||
|
f.write(
|
||||||
|
f' ASSERT_NE(info, nullptr) << "Failed to decode known valid pattern for {name}: {val:#x}";\n'
|
||||||
|
)
|
||||||
|
f.write(
|
||||||
|
f' EXPECT_STREQ(info->name, "{name}") << "Decoded as the wrong instruction variant.";\n'
|
||||||
|
)
|
||||||
|
f.write(
|
||||||
|
f' EXPECT_EQ((valid_inst & info->mask), info->expected) << "Mask/Expected mismatch on positive test.";\n\n'
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- 2. Negative Verification (Oracle Based) ---
|
||||||
|
f.write(
|
||||||
|
f" // 2. Negative Verification: Flip each fixed bit to ensure correct alternative decoding or rejection.\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
for i, char in enumerate(bitstring):
|
||||||
|
bit_pos = 31 - i
|
||||||
|
|
||||||
|
# We only test the fixed bits, as they define the instruction pattern.
|
||||||
|
if char in ("0", "1"):
|
||||||
|
mask = 1 << bit_pos
|
||||||
|
corrupt_inst = val ^ mask
|
||||||
|
|
||||||
|
# ORACLE: Determine what this corrupted instruction SHOULD decode to.
|
||||||
|
expected_decoded_name = python_decode(corrupt_inst, instructions)
|
||||||
|
|
||||||
|
f.write(f" {{\n")
|
||||||
|
f.write(f" // Test case: Flipping fixed bit {bit_pos}\n")
|
||||||
|
f.write(
|
||||||
|
f" const uint32_t corrupt_inst = {corrupt_inst:#010x};\n"
|
||||||
|
)
|
||||||
|
f.write(
|
||||||
|
f" const pvm_jit_decoder_arm32_instruction_info_t* neg_info = pvm_jit_decoder_arm32_decode(corrupt_inst);\n\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
if expected_decoded_name is None:
|
||||||
|
# Should decode to NOTHING
|
||||||
|
f.write(f" // Oracle predicts no match.\n")
|
||||||
|
f.write(
|
||||||
|
f' EXPECT_EQ(neg_info, nullptr) << "Safety Violation: Should have decoded to nullptr, but got " << (neg_info ? neg_info->name : "nullptr");\n'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Should decode to the OTHER valid instruction
|
||||||
|
f.write(
|
||||||
|
f" // Oracle predicts this should decode as: {expected_decoded_name}\n"
|
||||||
|
)
|
||||||
|
f.write(
|
||||||
|
f" ASSERT_NE(neg_info, nullptr) << \"Safety Violation: Python Oracle predicted '{expected_decoded_name}' but C++ decoder returned null\";\n"
|
||||||
|
)
|
||||||
|
f.write(
|
||||||
|
f' EXPECT_STREQ(neg_info->name, "{expected_decoded_name}") << "Safety Violation: Incorrect decode on single-bit corruption.";\n'
|
||||||
|
)
|
||||||
|
|
||||||
|
f.write(f" }}\n")
|
||||||
|
|
||||||
|
f.write(f"}}\n\n")
|
||||||
|
|
||||||
|
# --- 3. Generate Fuzz Test for overall stability ---
|
||||||
|
f.write("TEST_F(Arm32DecoderGeneratedTest, Stability_Fuzz_Test) {\n")
|
||||||
|
f.write(
|
||||||
|
" // Feeds a large number of random inputs to the decoder to check for crashes or false positives.\n"
|
||||||
|
)
|
||||||
|
f.write(" std::mt19937 rng(42); // Fixed seed for deterministic runs\n")
|
||||||
|
f.write(" std::uniform_int_distribution<uint32_t> dist;\n\n")
|
||||||
|
f.write(" for(int i = 0; i < 100000; ++i) {\n")
|
||||||
|
f.write(" uint32_t random_inst = dist(rng);\n")
|
||||||
|
f.write(
|
||||||
|
" const pvm_jit_decoder_arm32_instruction_info_t* info = pvm_jit_decoder_arm32_decode(random_inst);\n"
|
||||||
|
)
|
||||||
|
f.write(" if (info) {\n")
|
||||||
|
f.write(
|
||||||
|
" // If the decoder claims a match, it MUST be a valid match.\n"
|
||||||
|
)
|
||||||
|
f.write(" ASSERT_EQ((random_inst & info->mask), info->expected) \n")
|
||||||
|
f.write(
|
||||||
|
' << "Integrity Violation: Decoded " << std::hex << random_inst << " as \\"" << info->name << "\\" but mask/expected failed.";\n'
|
||||||
|
)
|
||||||
|
f.write(" }\n")
|
||||||
|
f.write(" }\n")
|
||||||
|
f.write("}\n")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Main entry point for the script."""
|
||||||
|
parser = argparse.ArgumentParser(description="Generate ARM32 Decoder Tests")
|
||||||
|
parser.add_argument("input", help="Path to arm32.inc")
|
||||||
|
parser.add_argument("output", help="Path to output test_arm32_generated.cpp")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print(f"{args.input} -> {args.output}")
|
||||||
|
|
||||||
|
# Use a fixed seed for deterministic test generation. This is crucial for reproducibility.
|
||||||
|
random.seed(12345)
|
||||||
|
|
||||||
|
instructions = parse_inc_file(args.input)
|
||||||
|
generate_cpp_tests(instructions, args.output)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
@ -1,16 +1,15 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import datetime
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# Increased bucket size to handle overlapping wildcards
|
||||||
# Configuration & Logic
|
MAX_BUCKET_SIZE = 64
|
||||||
# ---------------------------------------------------------
|
|
||||||
|
|
||||||
MAX_BUCKET_SIZE = 18
|
|
||||||
TABLE_SIZE = 4096
|
TABLE_SIZE = 4096
|
||||||
|
|
||||||
|
# Bits [27:20] and [7:4]
|
||||||
|
HASH_BITS_MASK = 0x0FF000F0
|
||||||
|
|
||||||
class Instruction:
|
class Instruction:
|
||||||
def __init__(self, name, mnemonic, bitstring, array_index):
|
def __init__(self, name, mnemonic, bitstring, array_index):
|
||||||
self.name = name
|
self.name = name
|
||||||
|
|
@ -33,11 +32,7 @@ class Instruction:
|
||||||
elif char == '1':
|
elif char == '1':
|
||||||
self.mask |= (1 << bit_pos)
|
self.mask |= (1 << bit_pos)
|
||||||
self.expected |= (1 << bit_pos)
|
self.expected |= (1 << bit_pos)
|
||||||
|
# Variable bits (c, n, d, m, etc) leave mask as 0
|
||||||
def get_hash(self):
|
|
||||||
major = (self.expected >> 20) & 0xFF
|
|
||||||
minor = (self.expected >> 4) & 0x0F
|
|
||||||
return (major << 4) | minor
|
|
||||||
|
|
||||||
def parse_inc_file(input_path):
|
def parse_inc_file(input_path):
|
||||||
instructions = []
|
instructions = []
|
||||||
|
|
@ -65,19 +60,43 @@ def parse_inc_file(input_path):
|
||||||
|
|
||||||
def generate_lookup_table(instructions):
|
def generate_lookup_table(instructions):
|
||||||
buckets = {i: [] for i in range(TABLE_SIZE)}
|
buckets = {i: [] for i in range(TABLE_SIZE)}
|
||||||
|
|
||||||
|
# Iterate over every possible hash index to determine which instructions belong in it
|
||||||
|
for i in range(TABLE_SIZE):
|
||||||
|
# Reconstruct the 32-bit value that would generate this hash index
|
||||||
|
# Hash algorithm: (Major << 4) | Minor
|
||||||
|
# Major is bits [27:20], Minor is bits [7:4]
|
||||||
|
|
||||||
|
major_val = (i >> 4) & 0xFF
|
||||||
|
minor_val = i & 0x0F
|
||||||
|
|
||||||
|
# Create a "Probe" value with the hash bits set
|
||||||
|
probe_val = (major_val << 20) | (minor_val << 4)
|
||||||
|
|
||||||
for inst in instructions:
|
for inst in instructions:
|
||||||
idx = inst.get_hash()
|
# Check if this instruction matches this hash index.
|
||||||
buckets[idx].append(inst)
|
# An instruction matches if its FIXED bits (mask) match the Probe bits
|
||||||
if len(buckets[idx]) > MAX_BUCKET_SIZE:
|
# for the specific positions used by the hash.
|
||||||
print(f"FATAL ERROR: Bucket {idx:#05x} overflowed! Size: {len(buckets[idx])}")
|
|
||||||
|
relevant_mask = inst.mask & HASH_BITS_MASK
|
||||||
|
relevant_expected = inst.expected & HASH_BITS_MASK
|
||||||
|
|
||||||
|
if (probe_val & relevant_mask) == relevant_expected:
|
||||||
|
buckets[i].append(inst)
|
||||||
|
|
||||||
|
if len(buckets[i]) > MAX_BUCKET_SIZE:
|
||||||
|
print(f"FATAL ERROR: Bucket {i:#05x} overflowed! Size: {len(buckets[i])}")
|
||||||
|
print("This means too many instructions map to the same hash index.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
return buckets
|
return buckets
|
||||||
|
|
||||||
def write_c_file(path, instructions, buckets):
|
def write_c_file(path, instructions, buckets):
|
||||||
with open(path, 'w') as f:
|
with open(path, 'w') as f:
|
||||||
f.write("/* GENERATED FILE - DO NOT EDIT */\n")
|
f.write("/* GENERATED FILE - DO NOT EDIT */\n")
|
||||||
|
f.write("/* This file is generated by scripts/generate_jit_decoder_a32_table.py */\n")
|
||||||
f.write('#include "arm32.h"\n')
|
f.write('#include "arm32.h"\n')
|
||||||
f.write('#include "arm32_table_generated.h"\n')
|
f.write('#include "arm32_table_generated.h"\n\n')
|
||||||
|
|
||||||
f.write(f"static const pvm_jit_decoder_arm32_instruction_info_t g_instructions[{len(instructions)}] = {{\n")
|
f.write(f"static const pvm_jit_decoder_arm32_instruction_info_t g_instructions[{len(instructions)}] = {{\n")
|
||||||
for inst in instructions:
|
for inst in instructions:
|
||||||
|
|
@ -118,7 +137,7 @@ def main():
|
||||||
parser.add_argument("out_h", help="Path to output .h file")
|
parser.add_argument("out_h", help="Path to output .h file")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
print(f"--- Generating Decoder: {args.input} -> {args.out_c} ---")
|
print(f"{args.input} -> {args.out_c}")
|
||||||
instructions = parse_inc_file(args.input)
|
instructions = parse_inc_file(args.input)
|
||||||
buckets = generate_lookup_table(instructions)
|
buckets = generate_lookup_table(instructions)
|
||||||
write_c_file(args.out_c, instructions, buckets)
|
write_c_file(args.out_c, instructions, buckets)
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
find_package(Python3 REQUIRED)
|
|
||||||
|
|
||||||
# Define the generated files
|
# Define the generated files
|
||||||
set(GEN_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/frontend/decoder/arm32_table_generated.c)
|
set(GEN_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/frontend/decoder/arm32_table_generated.c)
|
||||||
|
|
@ -10,7 +9,7 @@ add_custom_command(
|
||||||
OUTPUT ${GEN_SOURCE} ${GEN_HEADER}
|
OUTPUT ${GEN_SOURCE} ${GEN_HEADER}
|
||||||
COMMAND Python3::Interpreter ${SCRIPT} ${INC_FILE} ${GEN_SOURCE} ${GEN_HEADER}
|
COMMAND Python3::Interpreter ${SCRIPT} ${INC_FILE} ${GEN_SOURCE} ${GEN_HEADER}
|
||||||
DEPENDS ${SCRIPT} ${INC_FILE}
|
DEPENDS ${SCRIPT} ${INC_FILE}
|
||||||
COMMENT "Generating ARM32 Decoder Tables (Safety Compliance)"
|
COMMENT "Generating ARM32 Decoder Tables"
|
||||||
)
|
)
|
||||||
|
|
||||||
add_library(jit STATIC)
|
add_library(jit STATIC)
|
||||||
|
|
@ -28,6 +27,6 @@ target_link_libraries(jit PRIVATE common)
|
||||||
|
|
||||||
target_include_directories(jit PUBLIC
|
target_include_directories(jit PUBLIC
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}
|
${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/frontend/dynarmic
|
${CMAKE_CURRENT_SOURCE_DIR}/
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/..
|
${CMAKE_CURRENT_SOURCE_DIR}/..
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,6 @@ pvm_jit_decoder_arm32_decode (const uint32_t instruction)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_WARNING("Cannot decode instruction 0x%08X", instruction);
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,11 @@
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
/* Extern C for unit tests. */
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
/*! @brief Represents static metadata associated with a specific ARM32
|
/*! @brief Represents static metadata associated with a specific ARM32
|
||||||
* instruction. */
|
* instruction. */
|
||||||
typedef struct
|
typedef struct
|
||||||
|
|
@ -59,4 +64,7 @@ typedef struct
|
||||||
const pvm_jit_decoder_arm32_instruction_info_t *pvm_jit_decoder_arm32_decode(
|
const pvm_jit_decoder_arm32_instruction_info_t *pvm_jit_decoder_arm32_decode(
|
||||||
const uint32_t instruction);
|
const uint32_t instruction);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif // POUND_JIT_DECODER_ARM32_H
|
#endif // POUND_JIT_DECODER_ARM32_H
|
||||||
|
|
|
||||||
|
|
@ -11,17 +11,28 @@ INST(BL, "BL", "cccc1011vvvvvvvvvvvvvvvvvvvvvvvv") /
|
||||||
INST(BX, "BX", "cccc000100101111111111110001mmmm") // v4T
|
INST(BX, "BX", "cccc000100101111111111110001mmmm") // v4T
|
||||||
INST(BXJ, "BXJ", "cccc000100101111111111110010mmmm") // v5J
|
INST(BXJ, "BXJ", "cccc000100101111111111110010mmmm") // v5J
|
||||||
|
|
||||||
|
// System / Status Register Access (Specifics)
|
||||||
|
/*
|
||||||
|
* FIX: Moved these to the top.
|
||||||
|
* RFE and SRS start with 1111, which conflicts with LDM/STM (cccc=1111).
|
||||||
|
* Checking these first prevents LDM/STM from shadowing them.
|
||||||
|
*/
|
||||||
|
INST(RFE, "RFE", "1111100--0-1----0000101000000000") // v6
|
||||||
|
INST(SRS, "SRS", "1111100--1-0110100000101000-----") // v6
|
||||||
|
INST(CPS, "CPS", "111100010000---00000000---0-----") // v6
|
||||||
|
INST(SETEND, "SETEND", "1111000100000001000000e000000000") // v6
|
||||||
|
|
||||||
// CRC32 instructions
|
// CRC32 instructions
|
||||||
INST(CRC32, "CRC32", "cccc00010zz0nnnndddd00000100mmmm") // v8
|
INST(CRC32, "CRC32", "cccc00010zz0nnnndddd00000100mmmm") // v8
|
||||||
INST(CRC32C, "CRC32C", "cccc00010zz0nnnndddd00100100mmmm") // v8
|
INST(CRC32C, "CRC32C", "cccc00010zz0nnnndddd00100100mmmm") // v8
|
||||||
|
|
||||||
// Coprocessor instructions
|
// Coprocessor instructions
|
||||||
INST(CDP, "CDP", "cccc1110ooooNNNNDDDDppppooo0MMMM") // v2 (CDP2: v5)
|
INST(CDP, "CDP", "cccc1110ooooNNNNDDDDppppooo0MMMM") // v2 (CDP2: v5)
|
||||||
INST(LDC, "LDC", "cccc110pudw1nnnnDDDDppppvvvvvvvv") // v2 (LDC2: v5)
|
|
||||||
INST(MCR, "MCR", "cccc1110ooo0NNNNttttppppooo1MMMM") // v2 (MCR2: v5)
|
INST(MCR, "MCR", "cccc1110ooo0NNNNttttppppooo1MMMM") // v2 (MCR2: v5)
|
||||||
INST(MCRR, "MCRR", "cccc11000100uuuuttttppppooooMMMM") // v5E (MCRR2: v6)
|
INST(MCRR, "MCRR", "cccc11000100uuuuttttppppooooMMMM") // v5E (MCRR2: v6)
|
||||||
INST(MRC, "MRC", "cccc1110ooo1NNNNttttppppooo1MMMM") // v2 (MRC2: v5)
|
INST(MRC, "MRC", "cccc1110ooo1NNNNttttppppooo1MMMM") // v2 (MRC2: v5)
|
||||||
INST(MRRC, "MRRC", "cccc11000101uuuuttttppppooooMMMM") // v5E (MRRC2: v6)
|
INST(MRRC, "MRRC", "cccc11000101uuuuttttppppooooMMMM") // v5E (MRRC2: v6)
|
||||||
|
INST(LDC, "LDC", "cccc110pudw1nnnnDDDDppppvvvvvvvv") // v2 (LDC2: v5)
|
||||||
INST(STC, "STC", "cccc110pudw0nnnnDDDDppppvvvvvvvv") // v2 (STC2: v5)
|
INST(STC, "STC", "cccc110pudw0nnnnDDDDppppvvvvvvvv") // v2 (STC2: v5)
|
||||||
|
|
||||||
// Data Processing instructions
|
// Data Processing instructions
|
||||||
|
|
@ -101,8 +112,7 @@ INST(SEVL, "SEVL", "----0011001000001111000000000101") /
|
||||||
INST(WFE, "WFE", "----0011001000001111000000000010") // v6K
|
INST(WFE, "WFE", "----0011001000001111000000000010") // v6K
|
||||||
INST(WFI, "WFI", "----0011001000001111000000000011") // v6K
|
INST(WFI, "WFI", "----0011001000001111000000000011") // v6K
|
||||||
INST(YIELD, "YIELD", "----0011001000001111000000000001") // v6K
|
INST(YIELD, "YIELD", "----0011001000001111000000000001") // v6K
|
||||||
INST(NOP, "Reserved Hint", "----0011001000001111------------")
|
INST(NOP, "NOP", "----0011001000001111000000000000") // v6K
|
||||||
INST(NOP, "Reserved Hint", "----001100100000111100000000----")
|
|
||||||
|
|
||||||
// Synchronization Primitive instructions
|
// Synchronization Primitive instructions
|
||||||
INST(CLREX, "CLREX", "11110101011111111111000000011111") // v6K
|
INST(CLREX, "CLREX", "11110101011111111111000000011111") // v6K
|
||||||
|
|
@ -181,7 +191,7 @@ INST(LDM, "LDM", "cccc100010w1nnnnxxxxxxxxxxxxxxxx") /
|
||||||
INST(LDMDA, "LDMDA", "cccc100000w1nnnnxxxxxxxxxxxxxxxx") // v1
|
INST(LDMDA, "LDMDA", "cccc100000w1nnnnxxxxxxxxxxxxxxxx") // v1
|
||||||
INST(LDMDB, "LDMDB", "cccc100100w1nnnnxxxxxxxxxxxxxxxx") // v1
|
INST(LDMDB, "LDMDB", "cccc100100w1nnnnxxxxxxxxxxxxxxxx") // v1
|
||||||
INST(LDMIB, "LDMIB", "cccc100110w1nnnnxxxxxxxxxxxxxxxx") // v1
|
INST(LDMIB, "LDMIB", "cccc100110w1nnnnxxxxxxxxxxxxxxxx") // v1
|
||||||
INST(LDM_usr, "LDM (usr reg)", "----100--101--------------------") // v1
|
INST(LDM_usr, "LDM (usr reg)", "----100--101----0---------------") // v1
|
||||||
INST(LDM_eret, "LDM (exce ret)", "----100--1-1----1---------------") // v1
|
INST(LDM_eret, "LDM (exce ret)", "----100--1-1----1---------------") // v1
|
||||||
INST(STM, "STM", "cccc100010w0nnnnxxxxxxxxxxxxxxxx") // v1
|
INST(STM, "STM", "cccc100010w0nnnnxxxxxxxxxxxxxxxx") // v1
|
||||||
INST(STMDA, "STMDA", "cccc100000w0nnnnxxxxxxxxxxxxxxxx") // v1
|
INST(STMDA, "STMDA", "cccc100000w0nnnnxxxxxxxxxxxxxxxx") // v1
|
||||||
|
|
@ -195,7 +205,6 @@ INST(BFI, "BFI", "cccc0111110vvvvvddddvvvvv001nnnn") /
|
||||||
INST(CLZ, "CLZ", "cccc000101101111dddd11110001mmmm") // v5
|
INST(CLZ, "CLZ", "cccc000101101111dddd11110001mmmm") // v5
|
||||||
INST(MOVT, "MOVT", "cccc00110100vvvvddddvvvvvvvvvvvv") // v6T2
|
INST(MOVT, "MOVT", "cccc00110100vvvvddddvvvvvvvvvvvv") // v6T2
|
||||||
INST(MOVW, "MOVW", "cccc00110000vvvvddddvvvvvvvvvvvv") // v6T2
|
INST(MOVW, "MOVW", "cccc00110000vvvvddddvvvvvvvvvvvv") // v6T2
|
||||||
INST(NOP, "NOP", "----0011001000001111000000000000") // v6K
|
|
||||||
INST(SBFX, "SBFX", "cccc0111101wwwwwddddvvvvv101nnnn") // v6T2
|
INST(SBFX, "SBFX", "cccc0111101wwwwwddddvvvvv101nnnn") // v6T2
|
||||||
INST(SEL, "SEL", "cccc01101000nnnndddd11111011mmmm") // v6
|
INST(SEL, "SEL", "cccc01101000nnnndddd11111011mmmm") // v6
|
||||||
INST(UBFX, "UBFX", "cccc0111111wwwwwddddvvvvv101nnnn") // v6T2
|
INST(UBFX, "UBFX", "cccc0111111wwwwwddddvvvvv101nnnn") // v6T2
|
||||||
|
|
@ -251,12 +260,12 @@ INST(SMMLA, "SMMLA", "cccc01110101ddddaaaammmm00R1nnnn") /
|
||||||
INST(SMMLS, "SMMLS", "cccc01110101ddddaaaammmm11R1nnnn") // v6
|
INST(SMMLS, "SMMLS", "cccc01110101ddddaaaammmm11R1nnnn") // v6
|
||||||
|
|
||||||
// Multiply (Dual) instructions
|
// Multiply (Dual) instructions
|
||||||
|
INST(SMUAD, "SMUAD", "cccc01110000dddd1111mmmm00M1nnnn") // v6
|
||||||
INST(SMLAD, "SMLAD", "cccc01110000ddddaaaammmm00M1nnnn") // v6
|
INST(SMLAD, "SMLAD", "cccc01110000ddddaaaammmm00M1nnnn") // v6
|
||||||
INST(SMLALD, "SMLALD", "cccc01110100ddddaaaammmm00M1nnnn") // v6
|
INST(SMLALD, "SMLALD", "cccc01110100ddddaaaammmm00M1nnnn") // v6
|
||||||
|
INST(SMUSD, "SMUSD", "cccc01110000dddd1111mmmm01M1nnnn") // v6
|
||||||
INST(SMLSD, "SMLSD", "cccc01110000ddddaaaammmm01M1nnnn") // v6
|
INST(SMLSD, "SMLSD", "cccc01110000ddddaaaammmm01M1nnnn") // v6
|
||||||
INST(SMLSLD, "SMLSLD", "cccc01110100ddddaaaammmm01M1nnnn") // v6
|
INST(SMLSLD, "SMLSLD", "cccc01110100ddddaaaammmm01M1nnnn") // v6
|
||||||
INST(SMUAD, "SMUAD", "cccc01110000dddd1111mmmm00M1nnnn") // v6
|
|
||||||
INST(SMUSD, "SMUSD", "cccc01110000dddd1111mmmm01M1nnnn") // v6
|
|
||||||
|
|
||||||
// Parallel Add/Subtract (Modulo) instructions
|
// Parallel Add/Subtract (Modulo) instructions
|
||||||
INST(SADD8, "SADD8", "cccc01100001nnnndddd11111001mmmm") // v6
|
INST(SADD8, "SADD8", "cccc01100001nnnndddd11111001mmmm") // v6
|
||||||
|
|
@ -306,11 +315,8 @@ INST(QSUB, "QSUB", "cccc00010010nnnndddd00000101mmmm") /
|
||||||
INST(QDADD, "QDADD", "cccc00010100nnnndddd00000101mmmm") // v5xP
|
INST(QDADD, "QDADD", "cccc00010100nnnndddd00000101mmmm") // v5xP
|
||||||
INST(QDSUB, "QDSUB", "cccc00010110nnnndddd00000101mmmm") // v5xP
|
INST(QDSUB, "QDSUB", "cccc00010110nnnndddd00000101mmmm") // v5xP
|
||||||
|
|
||||||
// Status Register Access instructions
|
// Status Register Access instructions (Generals)
|
||||||
INST(CPS, "CPS", "111100010000---00000000---0-----") // v6
|
// Specifics like CPS/RFE moved to top to prevent shadowing
|
||||||
INST(SETEND, "SETEND", "1111000100000001000000e000000000") // v6
|
|
||||||
INST(MRS, "MRS", "cccc000100001111dddd000000000000") // v3
|
INST(MRS, "MRS", "cccc000100001111dddd000000000000") // v3
|
||||||
INST(MSR_imm, "MSR (imm)", "cccc00110010mmmm1111rrrrvvvvvvvv") // v3
|
INST(MSR_imm, "MSR (imm)", "cccc00110010mmmm1111rrrrvvvvvvvv") // v3
|
||||||
INST(MSR_reg, "MSR (reg)", "cccc00010010mmmm111100000000nnnn") // v3
|
INST(MSR_reg, "MSR (reg)", "cccc00010010mmmm111100000000nnnn") // v3
|
||||||
INST(RFE, "RFE", "1111100--0-1----0000101000000000") // v6
|
|
||||||
INST(SRS, "SRS", "1111100--1-0110100000101000-----") // v6
|
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -4,7 +4,7 @@
|
||||||
#include "arm32.h"
|
#include "arm32.h"
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
|
||||||
#define LOOKUP_TABLE_MAX_BUCKET_SIZE 18U
|
#define LOOKUP_TABLE_MAX_BUCKET_SIZE 64U
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const pvm_jit_decoder_arm32_instruction_info_t *instructions[LOOKUP_TABLE_MAX_BUCKET_SIZE];
|
const pvm_jit_decoder_arm32_instruction_info_t *instructions[LOOKUP_TABLE_MAX_BUCKET_SIZE];
|
||||||
|
|
|
||||||
|
|
@ -9,5 +9,5 @@ int main()
|
||||||
pvm_jit_decoder_arm32_decode(0xE2800001);
|
pvm_jit_decoder_arm32_decode(0xE2800001);
|
||||||
/* Sub r0, r0, #1 */
|
/* Sub r0, r0, #1 */
|
||||||
pvm_jit_decoder_arm32_decode(0xE2400001);
|
pvm_jit_decoder_arm32_decode(0xE2400001);
|
||||||
pvm_jit_decoder_arm32_decode(0xE12FFF1E);
|
pvm_jit_decoder_arm32_decode(0x67A757B4);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,224 +0,0 @@
|
||||||
#include <gtest/gtest.h>
|
|
||||||
#include "jit/decoder/arm32.h"
|
|
||||||
|
|
||||||
class Arm32DecoderTest : public ::testing::Test
|
|
||||||
{
|
|
||||||
protected:
|
|
||||||
static void SetUpTestSuite()
|
|
||||||
{
|
|
||||||
pound::jit::decoder::arm32_init();
|
|
||||||
}
|
|
||||||
|
|
||||||
static void TearDownTestSuite()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_F(Arm32DecoderTest, Decode_ADD_Immediate)
|
|
||||||
{
|
|
||||||
// Opcode: ADD (imm)
|
|
||||||
// Bitstring: cccc0010100Snnnnddddrrrrvvvvvvvv
|
|
||||||
// Condition (cccc): 1110 (AL - Always)
|
|
||||||
// Binary: 1110 0010 1000 0000 0000 0000 0000 0001 -> 0xE2800001
|
|
||||||
const uint32_t instruction = 0xE2800001;
|
|
||||||
|
|
||||||
const pound::jit::decoder::arm32_instruction_info_t* info = pound::jit::decoder::arm32_decode(instruction);
|
|
||||||
|
|
||||||
ASSERT_NE(info, nullptr) << "Failed to decode valid ADD instruction";
|
|
||||||
EXPECT_STREQ(info->name, "ADD (imm)");
|
|
||||||
EXPECT_EQ((instruction & info->mask), info->expected);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(Arm32DecoderTest, Decode_SUB_Immediate)
|
|
||||||
{
|
|
||||||
// Opcode: SUB (imm)
|
|
||||||
// Bitstring: cccc0010010Snnnnddddrrrrvvvvvvvv
|
|
||||||
// Binary: 1110 0010 0100 0000 0000 0000 0000 0001 -> 0xE2400001
|
|
||||||
const uint32_t instruction = 0xE2400001;
|
|
||||||
|
|
||||||
const pound::jit::decoder::arm32_instruction_info_t* info = pound::jit::decoder::arm32_decode(instruction);
|
|
||||||
|
|
||||||
ASSERT_NE(info, nullptr) << "Failed to decode valid SUB instruction";
|
|
||||||
EXPECT_STREQ(info->name, "SUB (imm)");
|
|
||||||
EXPECT_EQ((instruction & info->mask), info->expected);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(Arm32DecoderTest, Decode_BX)
|
|
||||||
{
|
|
||||||
// Opcode: BX
|
|
||||||
// Bitstring: cccc000100101111111111110001mmmm
|
|
||||||
// Condition: AL (0xE)
|
|
||||||
// mmmm (Rm): 1110 (LR/R14)
|
|
||||||
// Binary: 1110 0001 0010 1111 1111 1111 0001 1110 -> 0xE12FFF1E
|
|
||||||
const uint32_t instruction = 0xE12FFF1E;
|
|
||||||
|
|
||||||
const pound::jit::decoder::arm32_instruction_info_t* info = pound::jit::decoder::arm32_decode(instruction);
|
|
||||||
|
|
||||||
ASSERT_NE(info, nullptr);
|
|
||||||
EXPECT_STREQ(info->name, "BX");
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(Arm32DecoderTest, Decode_Unknown_Instruction)
|
|
||||||
{
|
|
||||||
uint32_t instruction = 0xE7F001F0;
|
|
||||||
const pound::jit::decoder::arm32_instruction_info_t* info = pound::jit::decoder::arm32_decode(instruction);
|
|
||||||
|
|
||||||
EXPECT_STREQ(info->name,"UDF");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Test Case: Negative Test - Double Initialization.
|
|
||||||
* @details Verifies that re-initializing the decoder triggers an assertion failure.
|
|
||||||
* This enforces the singleton lifecycle of the decoder.
|
|
||||||
*/
|
|
||||||
TEST_F(Arm32DecoderTest, Fail_Double_Initialization)
|
|
||||||
{
|
|
||||||
// Expect the process to die with an assertion failure message.
|
|
||||||
// The error message regex matches the one in src/jit/decoder/arm32.cpp.
|
|
||||||
EXPECT_DEATH({
|
|
||||||
pound::jit::decoder::arm32_init();
|
|
||||||
}, "Decoder already initialized");
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------
|
|
||||||
// Isolated Death Tests
|
|
||||||
// -----------------------------------------------------------------------------
|
|
||||||
// These tests are separated because they require a "Pre-Init" state.
|
|
||||||
// Since Arm32DecoderTest::SetUpTestSuite initializes the global state,
|
|
||||||
// we cannot use that fixture for these tests.
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Test Case: Negative Test - Decode Before Initialization.
|
|
||||||
* @details Verifies that attempting to decode before calling init() triggers a crash.
|
|
||||||
* Crucial for fail-fast safety requirements.
|
|
||||||
*/
|
|
||||||
TEST(Arm32DecoderDeathTest, Fail_Decode_Before_Init)
|
|
||||||
{
|
|
||||||
// We rely on GTest running this in a fresh process/context where
|
|
||||||
// the static g_decoder.is_initialized is false.
|
|
||||||
// Note: If GTest runs in a single process mode, this test might fail
|
|
||||||
// if other tests ran first. Standard GTest isolation usually handles this via fork()
|
|
||||||
// inside EXPECT_DEATH, but the surrounding code must not have initialized it.
|
|
||||||
//
|
|
||||||
// However, EXPECT_DEATH forks *before* executing the statement.
|
|
||||||
// So if the *parent* process is already initialized (by the Fixture above),
|
|
||||||
// the child will be too.
|
|
||||||
//
|
|
||||||
// IMPORTANT: In a real CI environment, `Arm32DecoderTest` will run.
|
|
||||||
// To properly test "Before Init", we rely on the fact that `arm32_init`
|
|
||||||
// has NOT been called in the global scope of `main.cpp` of the test runner
|
|
||||||
// before GTest starts.
|
|
||||||
//
|
|
||||||
// If the previous tests ran, the global state in this process is dirty.
|
|
||||||
// There is no `arm32_shutdown`.
|
|
||||||
// Therefore, this test is effectively untestable in the same binary execution
|
|
||||||
// as the positive tests without a reset mechanism in the source code.
|
|
||||||
//
|
|
||||||
// FOR THE PURPOSE OF THIS DELIVERABLE:
|
|
||||||
// We document this limitation. In a rigorous environment, `EXPECT_DEATH`
|
|
||||||
// tests for singletons without reset capabilities are often run in a separate binary.
|
|
||||||
//
|
|
||||||
// For now, we assume this test runs *first* or in isolation.
|
|
||||||
|
|
||||||
/*
|
|
||||||
* UNCOMMENTING THIS REQUIRES A FRESH PROCESS STATE.
|
|
||||||
*
|
|
||||||
EXPECT_DEATH({
|
|
||||||
pound::jit::decoder::arm32_decode(0xE2800001);
|
|
||||||
}, "Decoder needs to initialize");
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Test Case: Hash Collision Handling.
|
|
||||||
* @details Verify that two instructions that share the same hash index
|
|
||||||
* (bits [27:20] and [7:4]) but differ in other mask bits
|
|
||||||
* are correctly resolved.
|
|
||||||
*/
|
|
||||||
TEST_F(Arm32DecoderTest, Decode_Hash_Collision_Resolution)
|
|
||||||
{
|
|
||||||
// We need to find two instructions where:
|
|
||||||
// Index = ((Inst >> 20) & 0xFF) | ((Inst >> 4) & 0xF) is IDENTICAL.
|
|
||||||
// But the instructions are different.
|
|
||||||
|
|
||||||
// Case Study:
|
|
||||||
// 1. MOV (imm): cccc 0011 101S 0000 dddd rrrr vvvvvvvv
|
|
||||||
// Op bits involved in hash: 0011 1010 (Bits 27-20)
|
|
||||||
//
|
|
||||||
// 2. MVN (imm): cccc 0011 111S 0000 dddd rrrr vvvvvvvv
|
|
||||||
// Op bits involved in hash: 0011 1110
|
|
||||||
// Different hash.
|
|
||||||
|
|
||||||
// Let's look closely at the bitmasks in arm32.inc.
|
|
||||||
// The hash is very specific. Collisions occur when the differentiator
|
|
||||||
// is NOT in bits 27-20 or 7-4.
|
|
||||||
|
|
||||||
// Example Candidate:
|
|
||||||
// TST (reg): cccc 0001 0001 ... 0000 ... 0 mmmm
|
|
||||||
// TEQ (reg): cccc 0001 0011 ... 0000 ... 0 mmmm
|
|
||||||
// Bits 27-20:
|
|
||||||
// TST: 0001 0001 (0x11)
|
|
||||||
// TEQ: 0001 0011 (0x13) -> Different hash.
|
|
||||||
|
|
||||||
// Example Candidate 2:
|
|
||||||
// ORR (reg): cccc 0001 100S ...
|
|
||||||
// MOV (reg): cccc 0001 101S ... -> Different hash.
|
|
||||||
|
|
||||||
// Due to the density of the ARM encoding and the specific hash function chosen,
|
|
||||||
// explicitly forcing a collision for a unit test requires deep analysis of the
|
|
||||||
// provided .inc file.
|
|
||||||
// However, rigorous testing demands we verification of the lookup logic.
|
|
||||||
// We will verify multiple instructions to ensure no false positives occur.
|
|
||||||
|
|
||||||
uint32_t inst_a = 0xE1A00000; // MOV R0, R0 (NOP) -> MOV (reg)
|
|
||||||
uint32_t inst_b = 0xE0800000; // ADD R0, R0, R0 -> ADD (reg)
|
|
||||||
|
|
||||||
const pound::jit::decoder::arm32_instruction_info_t *info_a = pound::jit::decoder::arm32_decode(inst_a);
|
|
||||||
const pound::jit::decoder::arm32_instruction_info_t *info_b = pound::jit::decoder::arm32_decode(inst_b);
|
|
||||||
|
|
||||||
ASSERT_NE(info_a, nullptr);
|
|
||||||
ASSERT_NE(info_b, nullptr);
|
|
||||||
|
|
||||||
EXPECT_STREQ(info_a->name, "MOV (reg)");
|
|
||||||
EXPECT_STREQ(info_b->name, "ADD (reg)");
|
|
||||||
|
|
||||||
// Ensure they point to different metadata addresses
|
|
||||||
EXPECT_NE(info_a, info_b);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Test Case: Verify internal hash boundary conditions.
|
|
||||||
* @details Ensures that instructions resulting in max hash index (0xFFF) do not crash.
|
|
||||||
*/
|
|
||||||
TEST_F(Arm32DecoderTest, Decode_Max_Hash_Index)
|
|
||||||
{
|
|
||||||
// Hash = ((Major) << 4) | Minor
|
|
||||||
// Major = Bits 27:20. Max 0xFF.
|
|
||||||
// Minor = Bits 7:4. Max 0xF.
|
|
||||||
|
|
||||||
// Construct an instruction that maximizes these bits.
|
|
||||||
// Inst = ... 1111 1111 ... 1111 ....
|
|
||||||
// 0x0FF000F0
|
|
||||||
|
|
||||||
// We need a valid instruction that happens to have high bits set.
|
|
||||||
// Most ARM instructions start with condition codes.
|
|
||||||
// 1111 (NV) is usually extension space or PLD/etc.
|
|
||||||
|
|
||||||
// PLD (imm): 1111 0101 ...
|
|
||||||
// Major: 1111 0101 (0xF5)
|
|
||||||
|
|
||||||
// This test ensures that calculating the index doesn't OOB access the array.
|
|
||||||
// Since the array is size LOOKUP_TABLE_INDEX_MASK + 1 (0x1000),
|
|
||||||
// and the logic masks with 0xFFF, it is mathematically safe,
|
|
||||||
// but we test it to verify the logic integration.
|
|
||||||
|
|
||||||
// PLD (imm): 1111 0101 0101 0000 1111 0000 0000 0000 -> 0xF550F000
|
|
||||||
uint32_t inst = 0xF550F000;
|
|
||||||
|
|
||||||
// Even if it returns nullptr (if not in .inc), it must not segfault.
|
|
||||||
const pound::jit::decoder::arm32_instruction_info_t* info = pound::jit::decoder::arm32_decode(inst);
|
|
||||||
|
|
||||||
if (info) {
|
|
||||||
EXPECT_STREQ(info->name, "PLD (imm)");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
33111
tests/jit/decoder/test_arm32_generated.cpp
Normal file
33111
tests/jit/decoder/test_arm32_generated.cpp
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue