jit/decoder: Overhaul arm32 decoder

Rewrites the Arm32 instruction decoder to align with high integrity
software standards such as MISRA C and BARR-C.

Key Architectural  Changes:

- Added compile time validation.
- Removed dependency on memory allocator. The decoder now uses
   statically allocated global storage.
- Implement a hash-based lookup table to reduce decoding complexity to
   O(1).
- Removed decoder_t in favour of a singleton pattern.
- Add documentation stating thread safety, preconditions, and
   postconditions for all public functions.

Signed-off-by: Ronald Caesar <github43132@proton.me>
This commit is contained in:
Ronald Caesar 2025-11-28 19:03:04 -04:00
parent 04b5af2517
commit 27710ca8c9
No known key found for this signature in database
GPG key ID: 04307C401999C596
3 changed files with 240 additions and 129 deletions

View file

@ -6,137 +6,212 @@
#include "common/logging.h"
namespace pound::jit::decoder {
#define INSTRUCTION_ARRAY_CAPACITY 261
/*! @brief Maximum number of instructions allowed in a single hash bucket. */
#define LOOKUP_TABLE_MAX_BUCKET_SIZE 8
/*! @brief Size of the lookup table (12-bit index). */
#define LOOKUP_TABLE_INDEX_MASK 0xFFF
/*! @brief Expected length of the bitstring representation in the .inc file. */
#define INSTRUCTION_BITSTRING_LENGTH 32
/*!
* @brief A bucket within the decoding hash table.
* @details Stores a list of instructions that collide on the same hash index.
*/
typedef struct
{
/*! @brief Array of pointers to instruction definitions. */
const arm32_instruction_info_t *instructions[LOOKUP_TABLE_MAX_BUCKET_SIZE];
/*! @brief Current number of instructions in this bucket. */
size_t count;
} decode_bucket_t;
/*! @brief The internal state of the decoder. */
typedef struct
{
/*!
* @brief The main lookup table.
* @details Index constructed from bits [27:20] and [7:4] of the
* instruction.
*/
decode_bucket_t lookup_table[LOOKUP_TABLE_INDEX_MASK + 1];
/*! @brief Initialization guard flag. */
bool is_initialized;
} decoder_t;
/*! @brief Global decoder instance. */
static decoder_t g_decoder = {};
/*
* ============================================================================
* Foward Declarations
* Compile-Time Functions
* ============================================================================
*/
static void arm32_add_instruction(arm32_decoder_t *p_decoder,
const char *p_name,
const char *p_bitstring);
static void arm32_parse_bitstring(const char *p_bitstring,
uint32_t *p_mask,
uint32_t *p_expected);
/*! @brief Prototype for a non-constexpr function to force build failure. */
void BUILD_ERROR_ARM32_BITSTRING_MUST_BE_32_CHARS(void);
/*!
* @brief Parses a binary string literal into a bitmask at compile time.
*
* @param[in] string The null-terminated string literal.
* @return The calculated uint32_t mask.
*/
consteval uint32_t
parse_mask (const char *string)
{
size_t length = 0;
for (; string[length] != '\0'; ++length)
{
}
if (length != INSTRUCTION_BITSTRING_LENGTH)
{
BUILD_ERROR_ARM32_BITSTRING_MUST_BE_32_CHARS();
}
uint32_t mask = 0;
for (int i = 0; i < 32; ++i)
{
const uint32_t bit = 1U << (31 - i);
if ('0' == string[i] || string[i] == '1')
{
mask |= bit;
}
}
return mask;
}
/*!
* @brief Parses a binary string literal into an expected value at compile time.
*
* @param[in] string The null-terminated string literal.
* @return The calculated uint32_t expected value.
*/
consteval uint32_t
parse_expected (const char *string)
{
uint32_t expected = 0;
for (int i = 0; i < 32; ++i)
{
const uint32_t bit = 1U << (31 - i);
if (string[i] == '1')
{
expected |= bit;
}
}
return expected;
}
/*! @brief List of all supported ARM32 instructions. */
static const arm32_instruction_info_t g_instructions[] = {
#define INST(fn, name, bitstring) \
{ name, parse_mask(bitstring), parse_expected(bitstring) },
#include "arm32.inc"
#undef INST
};
/*! @brief The total number of defined instructions. */
#define INSTRUCTION_ARRAY_CAPACITY \
(sizeof(g_instructions) / sizeof(g_instructions[0]))
/*
* ============================================================================
* Public Functions
* ============================================================================
*/
void
arm32_init (pound::host::memory::arena_t allocator, arm32_decoder_t *p_decoder)
arm32_init (void)
{
PVM_ASSERT(nullptr != p_decoder);
PVM_ASSERT(nullptr != allocator.data);
PVM_ASSERT_MSG(false == g_decoder.is_initialized,
"Decoder already initialized.");
(void)memset(p_decoder, 0, sizeof(arm32_decoder_t));
p_decoder->allocator = allocator;
(void)memset(g_decoder.lookup_table, 0, sizeof(g_decoder.lookup_table));
/* Setup Instructions array.*/
size_t instructions_array_size
= INSTRUCTION_ARRAY_CAPACITY * sizeof(arm32_instruction_info_t);
PVM_ASSERT(instructions_array_size <= p_decoder->allocator.capacity);
LOG_TRACE("Allocated %d bytes to instructions array",
instructions_array_size);
void *p_memory = pound::host::memory::arena_allocate(
&p_decoder->allocator, instructions_array_size);
PVM_ASSERT(nullptr != p_memory);
p_decoder->p_instructions = (arm32_instruction_info_t *)p_memory;
p_decoder->instruction_capacity = INSTRUCTION_ARRAY_CAPACITY;
/* Add all Arm32 instructions */
#define INST(fn, name, bitstring) \
arm32_add_instruction(p_decoder, name, bitstring);
#include "./arm32.inc"
#undef INST
}
arm32_instruction_info_t *
arm32_decode (arm32_decoder_t *p_decoder, uint32_t instruction)
{
for (size_t i = 0; i < p_decoder->instruction_count; ++i)
// Populate the hash table.
for (uint32_t i = 0; i <= LOOKUP_TABLE_INDEX_MASK; ++i)
{
arm32_instruction_info_t *p_info = &p_decoder->p_instructions[i];
if ((instruction & p_info->mask) == p_info->expected)
decode_bucket_t *bucket = &g_decoder.lookup_table[i];
// Reconstruct the instruction bits that correspond to this hash index.
// Bits [27:20] and [7:4].
const uint32_t synthetic_instruction
= ((i & 0xFF0) << 16) | ((i & 0xF) << 4);
for (size_t ii = 0; ii < INSTRUCTION_ARRAY_CAPACITY; ++ii)
{
LOG_TRACE("Instruction found for 0x%08X: %s",
instruction,
p_info->p_name);
return p_info;
const arm32_instruction_info_t *info = &g_instructions[ii];
/* Mask corresponding to the hash bits: 0x0FF000F0 */
const uint32_t index_bits_mask = 0x0FF000F0;
const uint32_t relevant_mask = index_bits_mask | info->mask;
if ((synthetic_instruction & relevant_mask)
== (info->expected & relevant_mask))
{
LOG_TRACE("Mapping instruction '%s' to LUT Index 0x%03X",
info->name,
i);
if (bucket->count >= LOOKUP_TABLE_MAX_BUCKET_SIZE)
{
PVM_ASSERT_MSG(
false,
"ARM32 LUT Collision Overflow at index 0x%03X. "
"Increase MAX_LUT_BUCKET_SIZE.",
i);
}
if (bucket->count >= (LOOKUP_TABLE_MAX_BUCKET_SIZE / 2))
{
LOG_WARNING(
"High collision density at Index 0x%03X (Count: %zu).",
i,
bucket->count + 1);
}
bucket->instructions[bucket->count] = info;
++bucket->count;
}
}
}
PVM_ASSERT_MSG(false, "No instruction found for 0x%08X", instruction);
g_decoder.is_initialized = true;
LOG_INFO("ARM32 Decoder initialized with %zu instructions",
INSTRUCTION_ARRAY_CAPACITY);
}
/*
* ============================================================================
* Private Functions
* ============================================================================
*/
static void
arm32_add_instruction (arm32_decoder_t *p_decoder,
const char *p_name,
const char *p_bitstring)
const arm32_instruction_info_t *
arm32_decode (const uint32_t instruction)
{
PVM_ASSERT(nullptr != p_decoder);
PVM_ASSERT(nullptr != p_decoder->allocator.data);
PVM_ASSERT(nullptr != p_name);
PVM_ASSERT(nullptr != p_bitstring);
PVM_ASSERT(p_decoder->instruction_count < p_decoder->instruction_capacity);
PVM_ASSERT_MSG(true == g_decoder.is_initialized,
"Decoder needs to initialize.");
uint32_t mask = 0;
uint32_t expected = 0;
arm32_parse_bitstring(p_bitstring, &mask, &expected);
/* Extract hash key: Bits [27:20] and [7:4] */
const uint32_t major = (instruction >> 20) & 0xFF;
const uint32_t minor = (instruction >> 4) & 0xF;
arm32_instruction_info_t *p_info
= &p_decoder->p_instructions[p_decoder->instruction_count];
PVM_ASSERT(nullptr != p_info);
p_info->p_name = p_name;
p_info->mask = mask;
p_info->expected = expected;
const uint16_t index = (uint16_t)((major << 4) | minor);
++p_decoder->instruction_count;
const decode_bucket_t *bucket = &g_decoder.lookup_table[index];
LOG_TRACE("Instruction Registered: %s", p_info->p_name);
LOG_TRACE("Mask: 0x%08X", p_info->mask);
LOG_TRACE("Expected: 0x%08X", p_info->expected);
}
static void
arm32_parse_bitstring (const char *p_bitstring,
uint32_t *p_mask,
uint32_t *p_expected)
{
PVM_ASSERT(nullptr != p_bitstring);
PVM_ASSERT(nullptr != p_mask);
PVM_ASSERT(nullptr != p_expected);
PVM_ASSERT(INSTRUCTION_BITSTRING_LENGTH == strlen(p_bitstring));
*p_mask = 0;
*p_expected = 0;
uint8_t instruction_size_bits = 32;
for (unsigned int i = 0;
(i < instruction_size_bits) && (p_bitstring[i] != '\0');
++i)
for (size_t i = 0; i < bucket->count; ++i)
{
uint32_t bit_position = 31 - i;
switch (p_bitstring[i])
const arm32_instruction_info_t *info = bucket->instructions[i];
if ((instruction & info->mask) == info->expected)
{
case '0':
*p_mask |= (1U << bit_position);
break;
case '1':
*p_mask |= (1U << bit_position);
*p_expected |= (1U << bit_position);
default:
break;
return info;
}
}
return nullptr;
}
} // namespace pound::jit::p_decoder
} // namespace pound::jit::decoder

View file

@ -1,31 +1,74 @@
/**
* @file arm32.h
* @brief A32 Instruction Decoder Interface.
*
* @details
* This module provides the interface for decoding 32-bit ARM instructions
* into internal metadata structures.
*
* @note
* While the decoding lookup is thread-safe after initialization, the
* initialization phase itself is NOT thread-safe.
*/
#ifndef POUND_JIT_DECODER_ARM32_H
#define POUND_JIT_DECODER_ARM32_H
#include <stddef.h>
#include <stdint.h>
#include "host/memory/arena.h"
namespace pound::jit::decoder {
typedef struct arm32_decoder arm32_decoder_t;
/*! @brief Represents static metadata associated with a specific ARM32 instruction. */
typedef struct
{
const char *p_name;
/*! @brief The instruction mnemonic (e.g., "ADD", "LDR"). */
const char *name;
/*!
* @brief The bitmask indicating which bits in the instruction word are significant.
* @details 1 = significant bit, 0 = variable field (register, immediate, etc.).
*/
uint32_t mask;
/*!
* @brief The expected value of the instruction after applying the mask.
* @details (instruction & mask) == expected.
*/
uint32_t expected;
} arm32_instruction_info_t;
struct arm32_decoder
{
pound::host::memory::arena_t allocator;
arm32_instruction_info_t *p_instructions;
size_t instruction_count;
size_t instruction_capacity;
};
/*!
* @brief Initializes the ARM32 decoder lookup tables.
*
* @details
* Populates the internal hash table used for O(1) instruction decoding.
*
* @pre This function must be called during the application startup phase.
* @post The internal global decoder state is initialized and ready for use.
*
* @warning *!Thread Safety*!: Unsafe. This function modifies global state without locking.
*/
void arm32_init(void);
/*!
* @brief Decodes a raw 32-bit ARM instruction.
*
* @details
* Performs a hash lookup on the provided instruction word to find a matching
* definition.
*
* @param[in] instruction The raw 32-bit ARM machine code to decode.
*
* @return A pointer to the instruction metadata if a match is found.
* @return `nullptr` if the instruction is undefined or invalid.
*
* @pre `arm32_init()` must have been called successfully.
* @post The returned pointer (if not null) points to static read-only memory.
*
* @note *!Thread Safety*!: Safe. This function is read-only re-entrant.
*/
const arm32_instruction_info_t *arm32_decode(const uint32_t instruction);
} // namespace pound::jit::decoder
void arm32_init(pound::host::memory::arena_t allocator,
arm32_decoder_t *p_decoder);
arm32_instruction_info_t *arm32_decode(arm32_decoder_t *p_decoder,
uint32_t instruction);
}
#endif // POUND_JIT_DECODER_ARM32_H

View file

@ -3,23 +3,16 @@
#define LOG_MODULE "main"
#include "common/logging.h"
#include "common/passert.h"
#include "host/memory/arena.h"
#include "jit/decoder/arm32.h"
#include "jit/ir/opcode.h"
#include "jit/a32_types.h"
int main()
{
pound::host::memory::arena_t arena = pound::host::memory::arena_init(8192);
pound::jit::decoder::arm32_decoder_t decoder = {};
pound::jit::decoder::arm32_init(arena, &decoder);
pound::jit::decoder::arm32_init();
/* Add r0, r0, #1 */
pound::jit::decoder::arm32_decode(&decoder, 0xE2800001);
pound::jit::decoder::arm32_decode(0xE2800001);
/* Sub r0, r0, #1 */
pound::jit::decoder::arm32_decode(&decoder, 0xE2400001);
pound::jit::ir::opcode_init();
pound::host::memory::arena_free(&arena);
pound::jit::decoder::arm32_decode(0xE2400001);
//pound::jit::ir::opcode_init();
#if 0
gui::window_t window = {.data = nullptr, .gl_context = nullptr};