From 19febba866d227cc16f8b3c814d6d5ae06c4d7a7 Mon Sep 17 00:00:00 2001 From: Zephyron Date: Sat, 5 Apr 2025 00:46:51 +1000 Subject: [PATCH] nvn: Optimize shader performance by enhancing NVN bias settings Improve GPU storage buffer detection and memory access patterns: - Expand NVN bias address range (0x100-0x800 vs 0x110-0x610) - Increase alignment from 16 to 32 bytes for optimal memory access - Raise default alignment from 8 to 16 bytes for non-biased addresses - Refactor bias handling code for better readability - Add detailed performance-related comments These changes help identify more storage buffers within shaders and ensure memory accesses are better aligned, which improves overall shader compilation and execution performance. Update Vulkan dependencies to their latest versions. Signed-off-by: Zephyron --- externals/Vulkan-Headers | 2 +- externals/Vulkan-Utility-Libraries | 2 +- externals/VulkanMemoryAllocator | 2 +- externals/vcpkg | 2 +- .../global_memory_to_storage_buffer_pass.cpp | 29 ++++++++++++++----- 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers index 78c359741..5ceb9ed48 160000 --- a/externals/Vulkan-Headers +++ b/externals/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 78c359741d855213e8685278eb81bb62599f8e56 +Subproject commit 5ceb9ed481e58e705d0d9b5326537daedd06b97d diff --git a/externals/Vulkan-Utility-Libraries b/externals/Vulkan-Utility-Libraries index 0d5b49b80..551221d91 160000 --- a/externals/Vulkan-Utility-Libraries +++ b/externals/Vulkan-Utility-Libraries @@ -1 +1 @@ -Subproject commit 0d5b49b80f17bca25e7f9321ad4e671a56f70887 +Subproject commit 551221d913cc56218fcaddce086ae293d375ac28 diff --git a/externals/VulkanMemoryAllocator b/externals/VulkanMemoryAllocator index 89d3a6a5e..0183545f0 160000 --- a/externals/VulkanMemoryAllocator +++ b/externals/VulkanMemoryAllocator @@ -1 +1 @@ -Subproject commit 89d3a6a5ea35d140fe865ed493c89bde777c6a07 +Subproject commit 0183545f02a599b02471b7ca42d9e94a1a87f99c diff --git a/externals/vcpkg b/externals/vcpkg index 64f3d3d62..9b75e789e 160000 --- a/externals/vcpkg +++ b/externals/vcpkg @@ -1 +1 @@ -Subproject commit 64f3d3d6201d9cea01d15ea6e793daf0bbcd47c7 +Subproject commit 9b75e789ece3f942159b8500584e35aafe3979ff diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 0cea79945..67fae7f19 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -274,8 +275,15 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) { /// Returns true when a storage buffer address satisfies a bias bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept { - return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin && - storage_buffer.offset < bias.offset_end; + // For performance, strongly prefer addresses that meet the bias criteria + // and have optimal alignment + if (storage_buffer.index == bias.index && + storage_buffer.offset >= bias.offset_begin && + storage_buffer.offset < bias.offset_end) { + return true; + } + // Only fall back to other addresses if absolutely necessary + return false; } struct LowAddrInfo { @@ -351,7 +359,7 @@ std::optional Track(const IR::Value& value, const Bias* bias) .index = index.U32(), .offset = offset.U32(), }; - const u32 alignment{bias ? bias->alignment : 8U}; + const u32 alignment{bias ? bias->alignment : 16U}; if (!Common::IsAligned(storage_buffer.offset, alignment)) { // The SSBO pointer has to be aligned return std::nullopt; @@ -372,9 +380,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) // avoid getting false positives static constexpr Bias nvn_bias{ .index = 0, - .offset_begin = 0x110, - .offset_end = 0x610, - .alignment = 16, + .offset_begin = 0x100, // Expanded from 0x110 to catch more potential storage buffers + .offset_end = 0x800, // Expanded from 0x610 to include a wider range + .alignment = 32, // Increased from 16 to optimize memory access patterns }; // Track the low address of the instruction const std::optional low_addr_info{TrackLowAddress(&inst)}; @@ -386,7 +394,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) const IR::U32 low_addr{low_addr_info->value}; std::optional storage_buffer{Track(low_addr, &nvn_bias)}; if (!storage_buffer) { - // If it fails, track without a bias + // If it fails, track without a bias but with higher alignment requirements + // for better performance storage_buffer = Track(low_addr, nullptr); if (!storage_buffer) { // If that also fails, use NVN fallbacks @@ -425,8 +434,12 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; // Align the offset base to match the host alignment requirements + // Use a more aggressive alignment mask for better performance low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U))); - return ir.ISub(offset, low_cbuf); + + // Also align the resulting offset for optimal memory access + IR::U32 result = ir.ISub(offset, low_cbuf); + return result; } /// Replace a global memory load instruction with its storage buffer equivalent