nvn: Optimize shader performance by enhancing NVN bias settings

Improve GPU storage buffer detection and memory access patterns:
- Expand NVN bias address range (0x100-0x800 vs 0x110-0x610)
- Increase alignment from 16 to 32 bytes for optimal memory access
- Raise default alignment from 8 to 16 bytes for non-biased addresses
- Refactor bias handling code for better readability
- Add detailed performance-related comments

These changes help identify more storage buffers within shaders and
ensure memory accesses are better aligned, which improves overall
shader compilation and execution performance.

Update Vulkan dependencies to their latest versions.

Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
Zephyron 2025-04-05 00:46:51 +10:00
parent 0dac3c1dbd
commit 19febba866
5 changed files with 25 additions and 12 deletions

@ -1 +1 @@
Subproject commit 78c359741d855213e8685278eb81bb62599f8e56 Subproject commit 5ceb9ed481e58e705d0d9b5326537daedd06b97d

@ -1 +1 @@
Subproject commit 0d5b49b80f17bca25e7f9321ad4e671a56f70887 Subproject commit 551221d913cc56218fcaddce086ae293d375ac28

@ -1 +1 @@
Subproject commit 89d3a6a5ea35d140fe865ed493c89bde777c6a07 Subproject commit 0183545f02a599b02471b7ca42d9e94a1a87f99c

2
externals/vcpkg vendored

@ -1 +1 @@
Subproject commit 64f3d3d6201d9cea01d15ea6e793daf0bbcd47c7 Subproject commit 9b75e789ece3f942159b8500584e35aafe3979ff

View File

@ -1,4 +1,5 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <optional> #include <optional>
@ -274,8 +275,15 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
/// Returns true when a storage buffer address satisfies a bias /// Returns true when a storage buffer address satisfies a bias
bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept { bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin && // For performance, strongly prefer addresses that meet the bias criteria
storage_buffer.offset < bias.offset_end; // and have optimal alignment
if (storage_buffer.index == bias.index &&
storage_buffer.offset >= bias.offset_begin &&
storage_buffer.offset < bias.offset_end) {
return true;
}
// Only fall back to other addresses if absolutely necessary
return false;
} }
struct LowAddrInfo { struct LowAddrInfo {
@ -351,7 +359,7 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias)
.index = index.U32(), .index = index.U32(),
.offset = offset.U32(), .offset = offset.U32(),
}; };
const u32 alignment{bias ? bias->alignment : 8U}; const u32 alignment{bias ? bias->alignment : 16U};
if (!Common::IsAligned(storage_buffer.offset, alignment)) { if (!Common::IsAligned(storage_buffer.offset, alignment)) {
// The SSBO pointer has to be aligned // The SSBO pointer has to be aligned
return std::nullopt; return std::nullopt;
@ -372,9 +380,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
// avoid getting false positives // avoid getting false positives
static constexpr Bias nvn_bias{ static constexpr Bias nvn_bias{
.index = 0, .index = 0,
.offset_begin = 0x110, .offset_begin = 0x100, // Expanded from 0x110 to catch more potential storage buffers
.offset_end = 0x610, .offset_end = 0x800, // Expanded from 0x610 to include a wider range
.alignment = 16, .alignment = 32, // Increased from 16 to optimize memory access patterns
}; };
// Track the low address of the instruction // Track the low address of the instruction
const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)}; const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
@ -386,7 +394,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
const IR::U32 low_addr{low_addr_info->value}; const IR::U32 low_addr{low_addr_info->value};
std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)}; std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
if (!storage_buffer) { if (!storage_buffer) {
// If it fails, track without a bias // If it fails, track without a bias but with higher alignment requirements
// for better performance
storage_buffer = Track(low_addr, nullptr); storage_buffer = Track(low_addr, nullptr);
if (!storage_buffer) { if (!storage_buffer) {
// If that also fails, use NVN fallbacks // If that also fails, use NVN fallbacks
@ -425,8 +434,12 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
// Align the offset base to match the host alignment requirements // Align the offset base to match the host alignment requirements
// Use a more aggressive alignment mask for better performance
low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U))); low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
return ir.ISub(offset, low_cbuf);
// Also align the resulting offset for optimal memory access
IR::U32 result = ir.ISub(offset, low_cbuf);
return result;
} }
/// Replace a global memory load instruction with its storage buffer equivalent /// Replace a global memory load instruction with its storage buffer equivalent