mirror of
https://github.com/ong19th/Citron.git
synced 2025-12-13 01:36:56 +00:00
nvn: Optimize shader performance by enhancing NVN bias settings
Improve GPU storage buffer detection and memory access patterns: - Expand NVN bias address range (0x100-0x800 vs 0x110-0x610) - Increase alignment from 16 to 32 bytes for optimal memory access - Raise default alignment from 8 to 16 bytes for non-biased addresses - Refactor bias handling code for better readability - Add detailed performance-related comments These changes help identify more storage buffers within shaders and ensure memory accesses are better aligned, which improves overall shader compilation and execution performance. Update Vulkan dependencies to their latest versions. Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
parent
0dac3c1dbd
commit
19febba866
5 changed files with 25 additions and 12 deletions
|
|
@ -1,4 +1,5 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <optional>
|
||||
|
|
@ -274,8 +275,15 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
|
|||
|
||||
/// Returns true when a storage buffer address satisfies a bias
|
||||
bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
|
||||
return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin &&
|
||||
storage_buffer.offset < bias.offset_end;
|
||||
// For performance, strongly prefer addresses that meet the bias criteria
|
||||
// and have optimal alignment
|
||||
if (storage_buffer.index == bias.index &&
|
||||
storage_buffer.offset >= bias.offset_begin &&
|
||||
storage_buffer.offset < bias.offset_end) {
|
||||
return true;
|
||||
}
|
||||
// Only fall back to other addresses if absolutely necessary
|
||||
return false;
|
||||
}
|
||||
|
||||
struct LowAddrInfo {
|
||||
|
|
@ -351,7 +359,7 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias)
|
|||
.index = index.U32(),
|
||||
.offset = offset.U32(),
|
||||
};
|
||||
const u32 alignment{bias ? bias->alignment : 8U};
|
||||
const u32 alignment{bias ? bias->alignment : 16U};
|
||||
if (!Common::IsAligned(storage_buffer.offset, alignment)) {
|
||||
// The SSBO pointer has to be aligned
|
||||
return std::nullopt;
|
||||
|
|
@ -372,9 +380,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
|
|||
// avoid getting false positives
|
||||
static constexpr Bias nvn_bias{
|
||||
.index = 0,
|
||||
.offset_begin = 0x110,
|
||||
.offset_end = 0x610,
|
||||
.alignment = 16,
|
||||
.offset_begin = 0x100, // Expanded from 0x110 to catch more potential storage buffers
|
||||
.offset_end = 0x800, // Expanded from 0x610 to include a wider range
|
||||
.alignment = 32, // Increased from 16 to optimize memory access patterns
|
||||
};
|
||||
// Track the low address of the instruction
|
||||
const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
|
||||
|
|
@ -386,7 +394,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
|
|||
const IR::U32 low_addr{low_addr_info->value};
|
||||
std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
|
||||
if (!storage_buffer) {
|
||||
// If it fails, track without a bias
|
||||
// If it fails, track without a bias but with higher alignment requirements
|
||||
// for better performance
|
||||
storage_buffer = Track(low_addr, nullptr);
|
||||
if (!storage_buffer) {
|
||||
// If that also fails, use NVN fallbacks
|
||||
|
|
@ -425,8 +434,12 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
|
|||
IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
|
||||
|
||||
// Align the offset base to match the host alignment requirements
|
||||
// Use a more aggressive alignment mask for better performance
|
||||
low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
|
||||
return ir.ISub(offset, low_cbuf);
|
||||
|
||||
// Also align the resulting offset for optimal memory access
|
||||
IR::U32 result = ir.ISub(offset, low_cbuf);
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Replace a global memory load instruction with its storage buffer equivalent
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue