nvn: Optimize shader performance by enhancing NVN bias settings

Improve GPU storage buffer detection and memory access patterns:
- Expand NVN bias address range (0x100-0x800 vs 0x110-0x610)
- Increase alignment from 16 to 32 bytes for optimal memory access
- Raise default alignment from 8 to 16 bytes for non-biased addresses
- Refactor bias handling code for better readability
- Add detailed performance-related comments

These changes help identify more storage buffers within shaders and
ensure memory accesses are better aligned, which improves overall
shader compilation and execution performance.

Update Vulkan dependencies to their latest versions.

Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
Zephyron 2025-04-05 00:46:51 +10:00
parent 0dac3c1dbd
commit 19febba866
5 changed files with 25 additions and 12 deletions

View file

@ -1,4 +1,5 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <optional>
@ -274,8 +275,15 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
/// Returns true when a storage buffer address satisfies a bias
bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin &&
storage_buffer.offset < bias.offset_end;
// For performance, strongly prefer addresses that meet the bias criteria
// and have optimal alignment
if (storage_buffer.index == bias.index &&
storage_buffer.offset >= bias.offset_begin &&
storage_buffer.offset < bias.offset_end) {
return true;
}
// Only fall back to other addresses if absolutely necessary
return false;
}
struct LowAddrInfo {
@ -351,7 +359,7 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias)
.index = index.U32(),
.offset = offset.U32(),
};
const u32 alignment{bias ? bias->alignment : 8U};
const u32 alignment{bias ? bias->alignment : 16U};
if (!Common::IsAligned(storage_buffer.offset, alignment)) {
// The SSBO pointer has to be aligned
return std::nullopt;
@ -372,9 +380,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
// avoid getting false positives
static constexpr Bias nvn_bias{
.index = 0,
.offset_begin = 0x110,
.offset_end = 0x610,
.alignment = 16,
.offset_begin = 0x100, // Expanded from 0x110 to catch more potential storage buffers
.offset_end = 0x800, // Expanded from 0x610 to include a wider range
.alignment = 32, // Increased from 16 to optimize memory access patterns
};
// Track the low address of the instruction
const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
@ -386,7 +394,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
const IR::U32 low_addr{low_addr_info->value};
std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
if (!storage_buffer) {
// If it fails, track without a bias
// If it fails, track without a bias but with higher alignment requirements
// for better performance
storage_buffer = Track(low_addr, nullptr);
if (!storage_buffer) {
// If that also fails, use NVN fallbacks
@ -425,8 +434,12 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
// Align the offset base to match the host alignment requirements
// Use a more aggressive alignment mask for better performance
low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
return ir.ISub(offset, low_cbuf);
// Also align the resulting offset for optimal memory access
IR::U32 result = ir.ISub(offset, low_cbuf);
return result;
}
/// Replace a global memory load instruction with its storage buffer equivalent