diff --git a/CMakeLists.txt b/CMakeLists.txt index d72f2e8..0fc63a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,7 @@ if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Debug) endif() + # Optimizations set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE ON) if (WIN32) @@ -35,6 +36,14 @@ add_executable(Pound ${Core} ) +target_compile_options(Pound PRIVATE -Wall -Wpedantic +-Wshadow +-Wpointer-arith +-Wcast-qual +-Wcast-align +-Wconversion +) + target_precompile_headers(Pound PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/core/Base/Types.h) # Link libraries diff --git a/core/arm64/guest.h b/core/arm64/guest.h index 9bf333a..ee06b61 100644 --- a/core/arm64/guest.h +++ b/core/arm64/guest.h @@ -192,4 +192,4 @@ static inline void guest_mem_writeq(guest_memory_t* memory, uint64_t gpa, uint64 uint64_t* hva = (uint64_t*)gpa_to_hva(memory, gpa); *hva = val; } -} // namespace pound::aarch64::memory +} // namespace pound::arm64::memory diff --git a/core/arm64/isa.cpp b/core/arm64/isa.cpp index d24e700..e6c0c67 100644 --- a/core/arm64/isa.cpp +++ b/core/arm64/isa.cpp @@ -42,7 +42,6 @@ void take_synchronous_exception(vcpu_state_t* vcpu, uint8_t exception_class, uin * to 0b0101 for EL1h (using SP_EL1). (page 913 in manual) */ const uint32_t PSTATE_EL_MASK = 0b1111; vcpu->pstate &= ~PSTATE_EL_MASK; - const uint32_t PSTATE_EL1H = 0b0101; vcpu->pstate |= PSTATE_EL1H; /* TODO(GloriousTacoo:arm): DO NOT IMPLEMENT UNTIL THE INSTRUCTION @@ -163,9 +162,5 @@ void cpuTest() guest_ram.size = guest_memory_arena.capacity; (void)test_guest_ram_access(&guest_ram); - vcpu_states[0].sctlr_el1 = 3; - uint64_t out = 0; - uint64_t gva = 2636; - assert(0 == pound::arm64::memory::mmu_gva_to_gpa(&vcpu_states[0], gva, &out)); } } // namespace pound::armv64 diff --git a/core/arm64/isa.h b/core/arm64/isa.h index 745f319..41599c3 100644 --- a/core/arm64/isa.h +++ b/core/arm64/isa.h @@ -25,6 +25,12 @@ namespace pound::arm64 /* Data Abort exception from a lower Exception level. */ #define EC_DATA_ABORT_LOWER_EL 0b100100 +/* Set the PSTATE exception level. (page 913 in manual) */ +#define PSTATE_EL0 0b0000 +#define PSTATE_EL1T 0b0100 +#define PSTATE_EL1H 0b0101 + + /* * vcpu_state_t - Holds the architectural and selected system-register state for an emulated vCPU. * @v: 128-bit SIMD/FP vector registers V0–V31. @@ -40,9 +46,12 @@ namespace pound::arm64 * @elr_el1: Exception Link Register. * @esr_el1: Exception Syndrome Register. * @far_el1: Fault Address Register. - * @vbar_el1: Vector Base Address Register. * @sctlr_el1: System Control Register. * @spsr_el1: Saved Program Status Register. + * @tcr_el1: Translation Control Register. + * @ttbr0_el1: Translation Table Base Register 0. + * @ttbr1_el1: Translation Table Base Register 1. + * @vbar_el1: Vector Base Address Register. * @ctr_el0: Cache-Type. * @cntv_ctl_el0: Virtual Timer Control. * @dczid_el0: Data Cache Zero ID. @@ -83,7 +92,7 @@ typedef struct alignas(CACHE_LINE_SIZE) /* The memory address that caused a Data Abort exception. */ uint64_t far_el1; - /* SCTLR_EL1[0] bit enables the MMU. */ + /* Bit [0] bit enables the MMU. */ uint64_t sctlr_el1; /* @@ -93,6 +102,29 @@ typedef struct alignas(CACHE_LINE_SIZE) */ uint64_t spsr_el1; + /* Bits [5:0], T0SZ, specifies the size of the bottom half of the + * virtual address space (the ones controlled by TTBR0). + * + * Bits [21:16], T1SZ, does the same for the top half of the virtual + * address space (controlled by TTBR1). */ + uint64_t tcr_el1; + + /* + * Holds the 64-bit base physical address of the initial page table + * used for translating virtual addresses in the lower half of the + * virtual address space (typically userspace). The top bit of the VA + * (bit 63) being 0 selects TTBR0 for the page table walk. + */ + uint64_t ttbr0_el1; + + /* + * Holds the 64-bit base physical address of the initial page table + * used for translating virtual addresses in the upper half of the + * virtual address space (typically kernel space). The top bit of the VA + * (bit 63) being 1 selects TTBR1 for the page table walk. + */ + uint64_t ttbr1_el1; + /* * The base address in guest memory where the Exception Vector Table * can be found. diff --git a/core/arm64/mmu.cpp b/core/arm64/mmu.cpp new file mode 100644 index 0000000..3737133 --- /dev/null +++ b/core/arm64/mmu.cpp @@ -0,0 +1,395 @@ +#include "mmu.h" +#include +#include "isa.h" + +namespace pound::arm64::memory +{ +#define GRANULE_4KB (1ULL << 12) +#define GRANULE_16KB (1ULL << 14) +#define GRANULE_64KB (1ULL << 16) + +/* + * COUNT_TRAILING_ZEROS - Get the number of trailing zero bits in a u64 + * @x: A 64-bit value, which must be non-zero. + * + * Provides a portable wrapper around compiler-specific intrinsics for the + * "Count Trailing Zeros" operation. This is equivalent to finding the bit + * index of the least significant bit (LSB). + * + * Note: The behavior for an input of zero is undefined for __builtin_ctzll. + * Callers must ensure the argument is non-zero. The MSVC wrapper handles + * this by returning 64, but we should not rely on this behavior. + */ +#if defined(__GNUC__) || defined(__clang__) +#define COUNT_TRAILING_ZEROS(x) (uint8_t)__builtin_ctzll(x) +#elif defined(_MSC_VER) +#include +/* MSVC's intrinsic is a bit more complex to use safely */ +static inline uint8_t msvc_ctzll(unsigned long long val) +{ + unsigned long index = 0; + if (_BitScanForward64(&index, val)) + { + return (uint8_t)index; + } + return 64; +} +#define COUNT_TRAILING_ZEROS(x) msvc_ctzll(x) +#else +#error "Compiler not supported for CTZ intrinsic. Please add a fallback." +#endif + +/* Define the size of a page table entry (descriptor) */ +#define PAGE_TABLE_ENTRY_SHIFT 3 /* log2(8 bytes) */ + +int mmu_gva_to_gpa(pound::arm64::vcpu_state_t* vcpu, guest_memory_t* memory, uint64_t gva, uint64_t* out_gpa) +{ + const uint8_t SCTLR_EL1_M_BIT = (1 << 0); + if (0 == (vcpu->sctlr_el1 & SCTLR_EL1_M_BIT)) + { + *out_gpa = gva; + return 0; + } + + /* Extract T0SZ (bits [5:0]) and T1SZ (bits [21:16]) from TCR_EL1. + * Both are 6-bit fields. */ + const uint64_t TxSZ_WIDTH = 6; + const uint64_t TxSZ_MASK = (1ULL << TxSZ_WIDTH) - 1; + + const uint8_t T0SZ = vcpu->tcr_el1 & TxSZ_MASK; + const uint8_t T1SZ = (vcpu->tcr_el1 >> 16) & TxSZ_MASK; + + /* The virtual address size in bits. */ + uint8_t virtual_address_size = 0; + + bool is_ttbr0 = false; + bool is_ttbr1 = false; + + /* + * Before starting a page table walk, the hardware must perform two checks: + * 1. Classify the GVA as belonging to the lower half (user, TTBR0) or + * upper half (kernel, TTBR1) of the virtual address space. + * 2. Validate that the GVA is correct for the configured VA size. + * + * The size of the VA space is configured by the TxSZ fields in TCR_EL1. + * A TxSZ value of N implies a (64 - N)-bit address space. For any valid + * address in this space, the top N bits must be a sign-extension of + * bit (63 - N). + * + * For example, in a 48-bit space (TxSZ=16), bit 47 is the top bit. + * - For a lower-half address, bits [63:47] must all be 0. + * - For an upper-half address, bits [63:47] must all be 1. + * + * This sign-extension rule means that bit 63 will always have the same + , * value as bit (63 - N) for any valid address. We can therefore use a + * simple check of bit 63 as an efficient shortcut to classify the + * address. The full canonical check that follows will then catch any + * invalid (non-sign-extended) addresses. + * + * Example Scenario: + * + * Kernel sets TCR_EL1.T0SZ = 16. This means it's using a 48-bit VA + * space (64 - 16 = 48). The top 16 bits of any valid user-space + * GVA must be 0. + * + * A GVA of 0x0001_0000_0000_0000 comes in. + * + * The top 16 bits are not all zero. An address translation fault is + * generated and the page table walk is aborted. + */ + if ((gva << 63) & 1) + { + /* Address appears to be in the Upper (Kernal) Half */ + + virtual_address_size = 64 - T1SZ; + const uint64_t top_bits_mask = (~0ULL << virtual_address_size); + const uint64_t gva_tag = gva & top_bits_mask; + const uint64_t ttbr1_tag = vcpu->ttbr1_el1 & top_bits_mask; + + if (gva_tag != ttbr1_tag) + { + /* TODO(GloriousTacoo:memory): Generate address translation fault */ + return -1; + } + is_ttbr1 = true; + } + else + { + /* Address appears to be in the Lower (User) Half */ + + virtual_address_size = 64 - T0SZ; + const uint64_t top_bits_mask = (~0ULL << virtual_address_size); + if (0 != (gva & top_bits_mask)) + { + /* TODO(GloriousTacoo:memory): Generate address translation fault */ + return -1; + } + is_ttbr0 = true; + } + + /* + * The preceding logic determined which address space (and thus + * which TTBR) we're dealing with. Now we get the page size + * in bytes from the correct TGx field. + */ + uint64_t granule_size = 0; + assert((true == is_ttbr0) || (true == is_ttbr1)); + if (true == is_ttbr0) + { + /* + * We're in userspace. We need to decode TCR_EL1.TG0, which is + * at bits [15:14]. + * + * Encoding for TG0: + * 0b00: 4KB granule + * 0b01: 64KB granule + * 0b10: 16KB granule + * 0b11: Reserved, will cause a fault. + */ + const uint8_t TG0_SHIFT = 14; + const uint8_t TG0_MASK = 0b11; + const uint8_t TG0 = (vcpu->tcr_el1 >> TG0_SHIFT) & TG0_MASK; + switch (TG0) + { + case 0b00: + granule_size = GRANULE_4KB; + break; + case 0b01: + granule_size = GRANULE_64KB; + break; + case 0b10: + granule_size = GRANULE_16KB; + break; + default: + /* + * This is an illegal configuration. The hardware will fault. + * For now, an assert will catch bad guest OS behaviour. + */ + assert(!"Invalid TG0 value in TCR_EL1"); + } + } + else + { + /* + * We're in kernel space. We decode TCR_EL1.TG1, which is at + * bits [31:30]. Note that the encoding values are different + * from TG0. Don't get caught out. + * + * Encoding for TG1: + * 0b01: 16KB granule + * 0b01: 4KB granule + * 0b11: 64KB granule + * 0b00: Reserved, will cause a fault. + */ + const uint8_t TG1_SHIFT = 30; + const uint8_t TG1_MASK = 0b11; + const uint8_t TG1 = (vcpu->tcr_el1 >> TG1_SHIFT) & TG1_MASK; + switch (TG1) + { + case 0b01: + /* 16KB page size */ + granule_size = GRANULE_16KB; + break; + case 0b10: + /* 4KB page size */ + granule_size = GRANULE_4KB; + break; + case 0b11: + /* 64KB page size */ + granule_size = GRANULE_64KB; + break; + default: + assert(!"Invalid TG1 value in TCR_EL1"); + break; + } + } + + /* + * In hardware, everything is a power of two. A 4096-byte page isn't + * a magic number; it's 2^12. This means you need exactly 12 bits to + * address every single byte within that page. + * + * The naive way to get 12 fron 4096 is to calculate log2(4096) but + * that's computationally expensive. A much faster way, and how the + * hardware thinks, is to find the position of the one set bit. + * + * 4096 in binary is: 0001 0000 0000 0000 (Bit 12 is set, followed + * by 12 zeroes). + * + * The number of trailing zeroes in a binary number is its + * logarithm base 2. The COUNT_TRAILING_ZEROES() function + * is a compiler intrinsic that typically boils down to + * a single CPU instruction (like TZCNT on x86). + */ + const uint8_t offset_bits = COUNT_TRAILING_ZEROS(granule_size); + + /* + * We now need to figure out how many bits are for the index at this + * level in the page table. + * + * A page table is just a big array of 8-byte entires (descriptors). + * The table itself has to fit perfectly into a page of memory (a granule). + * So a 4KB page holds a 4KB table. + * + * The number of entries in that table is: Granule Size / Entry Size. + * For a 4KB granule: 4096 bytes / 8 bytes = 512 entries. + * + * To index an array of 512 entries we need 9 bits (since 2^9 = 512). + * + * log2(Num Entries) = log2(Granule Size / Entry Size) + * log2(Num Entries) = log2(Granule Size) - log2(Entry Size) + * + * We already have log2(Granule Size); that's out `offset_bits`. + * The `PAGE_TABLE_ENTRY_SHIFT` is a constant for log2(Entry Size). + * An entry is 8 bytes, and 8 is 2^3, so its log2 is 3. + * + * For a 4KB granule: + * 12 offset bits - 3 bits = 9 index bits. + * + */ + const uint8_t page_table_index_bits = offset_bits - PAGE_TABLE_ENTRY_SHIFT; + + /* + * Next we determine the page table starting level and walk depth based on the + * virtual address size. The intent is to find the highest table level required + * to map the address space. A larger VA size requires a deeper walk. + */ + const uint8_t l3_shift = offset_bits; + const uint8_t l2_shift = l3_shift + page_table_index_bits; + const uint8_t l1_shift = l2_shift + page_table_index_bits; + const uint8_t l0_shift = l1_shift + page_table_index_bits; + uint8_t page_table_levels = 0; + uint8_t starting_level = 0; + switch (granule_size) + { + case GRANULE_4KB: + /* A 4KB granule supports up to a 4-level walk starting at L0. */ + page_table_levels = 3; /* 0..3 inclusive */ + if (virtual_address_size > l0_shift) + { + starting_level = 0; + } + else if (virtual_address_size > l1_shift) + { + starting_level = 1; + } + else + { + starting_level = 2; + } + break; + case GRANULE_16KB: + case GRANULE_64KB: + /* A 16KB and 64KB granule supports up to a 3-level walk starting at L1. */ + page_table_levels = 3; /* 1..3 inclusive */ + if (virtual_address_size > l1_shift) + { + starting_level = 1; + } + else + { + starting_level = 2; + } + break; + default: + /* This granule size is not supported by the architecture. */ + return -1; + } + + uint64_t table_address = 0x0; + if (true == is_ttbr0) + { + table_address = vcpu->ttbr0_el1; + } + else + { + table_address = vcpu->ttbr1_el1; + } + + /* + * Begin the multi-level page table walk. + * + * The walk starts from the base address of the initial table (L0 or L1, + * depending on the VA size) and descends level by level. At each level, + * we extract an index from the GVA, use it to find a descriptor in the + * current table, and then interpret that descriptor. The descriptor + * will either point to the next table in the hierarchy, describe + * the final physical page (a page descriptor), or indicate a fault. + */ + uint64_t level_index = 0; + const uint64_t page_table_index_mask = (1ULL << page_table_index_bits) - 1; + const uint8_t page_table_entry_size = 8; + for (uint8_t level = starting_level; level <= page_table_levels; ++level) + { + switch (level) + { + case 0: + level_index = (gva >> l0_shift) & page_table_index_mask; + break; + case 1: + level_index = (gva >> l1_shift) & page_table_index_mask; + break; + case 2: + level_index = (gva >> l2_shift) & page_table_index_mask; + break; + case 3: + level_index = (gva >> l3_shift) & page_table_index_mask; + break; + default: + assert(!"Invalid page table configuration!"); + } + + const uint64_t level_entry_address = table_address + (level_index * page_table_entry_size); + const uint64_t descriptor = guest_mem_readq(memory, level_entry_address); + uint64_t offset_mask = (1ULL << offset_bits) - 1; + uint64_t page_offset = gva & offset_mask; + uint64_t page_address_mask = ~offset_mask; + + /* + * Is the descriptor valid? Bit [0] of every descriptor is the "valid" + * bit. If it's 0, the entry is invalid, and the mapping does not exist. + */ + if (0b0 == (descriptor & 0b1)) + { + // TODO(GloriousTacoo:arm64): generate page fault. + return -1; + } + /* + * At the final level, the only valid descriptor is a Page Descriptor, + * identified by bits [1:0] being 0b11. + */ + else if ((level == page_table_levels) && (0b11 == (descriptor & 0b11))) + { + /* + * The descriptor's upper bits [virtual_address_size:offset_bits] + * contain the physical base address of the page. We mask out + * the lower attribute bits to isolate this address. + */ + uint64_t page_base_address = descriptor & page_address_mask; + *out_gpa = page_base_address | page_offset; + return 0; + } + /* + * If this is not the final level, we expect a Table Descriptor, also + * identified by bits [1:0] being 0b11. This descriptor points to the + * table for the next level of the walk. + */ + else if (0b11 == (descriptor & 0b11)) + { + const uint64_t next_table_mask = ~((1ULL << offset_bits) - 1); + table_address = descriptor & next_table_mask; + } + /* + * If bits [1:0] are '01', it's a Block Descriptor. These descriptors + * terminate the walk early, mapping a large, contiguous block of + * memory (e.g., 2MB at L2). This implementation does not yet + * support them. + */ + else if (0b01 == (descriptor & 0b11)) + { + assert(!"Block descriptors are not supported"); + } + } + return -1; +} +} // namespace pound::arm64::memory diff --git a/core/arm64/mmu.h b/core/arm64/mmu.h new file mode 100644 index 0000000..8ab5246 --- /dev/null +++ b/core/arm64/mmu.h @@ -0,0 +1,40 @@ +#pragma once + +#include "isa.h" +#include "guest.h" + +namespace pound::arm64::memory +{ +/* + * mmu_gva_to_gpa() - Translate a Guest Virtual Address to a Guest Physical Address. + * @vcpu: A pointer to the vCPU state. + * @memory: A pointrr to the guest's memory. + * @gva: The Guest Virtual Address to translate. + * @out_gpa: A pointer to a uint64_t where the resulting Guest Physical Address + * will be stored on success. + * + * This function is the primary entry point for the emulated AArch64 Stage 1 + * MMU. It is responsible for resolving a virtual address used by the guest + * into a physical address within the guest's physical address space. + * + * The translation behavior is dependent on the state of the emulated MMU, + * primarily controlled by the SCTLR_EL1.M bit (MMU enable). + * + * If the MMU is disabled, this function performs an identity mapping, where + * the GPA is identical to the GVA. This correctly models the processor's + * behavior on reset and is the initial "stub" implementation. + * + * If the MMU is enabled, this function will perform a full, multi-level page + * table walk, starting from the base address in TTBR0_EL1 or TTBR1_EL1. It + * will parse translation table descriptors, check for permissions, and handle + * different page sizes as configured in TCR_EL1. + * + * A failed translation will result in a fault. The caller is responsible for + * checking the return value and initiating a synchronous exception if a fault + * occurs. The contents of @out_gpa are undefined on failure. + * + * Return: 0 on successful translation. A negative error code on a translation + * fault (e.g., for a page fault, permission error, or alignment fault). + */ +int mmu_gva_to_gpa(pound::arm64::vcpu_state_t* vcpu, guest_memory_t* memory, uint64_t gva, uint64_t* out_gpa); +} // namespace pound::arm64::memory