diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers index 78c359741..5ceb9ed48 160000 --- a/externals/Vulkan-Headers +++ b/externals/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 78c359741d855213e8685278eb81bb62599f8e56 +Subproject commit 5ceb9ed481e58e705d0d9b5326537daedd06b97d diff --git a/externals/Vulkan-Utility-Libraries b/externals/Vulkan-Utility-Libraries index 0d5b49b80..551221d91 160000 --- a/externals/Vulkan-Utility-Libraries +++ b/externals/Vulkan-Utility-Libraries @@ -1 +1 @@ -Subproject commit 0d5b49b80f17bca25e7f9321ad4e671a56f70887 +Subproject commit 551221d913cc56218fcaddce086ae293d375ac28 diff --git a/externals/VulkanMemoryAllocator b/externals/VulkanMemoryAllocator index 89d3a6a5e..0183545f0 160000 --- a/externals/VulkanMemoryAllocator +++ b/externals/VulkanMemoryAllocator @@ -1 +1 @@ -Subproject commit 89d3a6a5ea35d140fe865ed493c89bde777c6a07 +Subproject commit 0183545f02a599b02471b7ca42d9e94a1a87f99c diff --git a/externals/vcpkg b/externals/vcpkg index 64f3d3d62..9b75e789e 160000 --- a/externals/vcpkg +++ b/externals/vcpkg @@ -1 +1 @@ -Subproject commit 64f3d3d6201d9cea01d15ea6e793daf0bbcd47c7 +Subproject commit 9b75e789ece3f942159b8500584e35aafe3979ff diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 0cea79945..67fae7f19 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -274,8 +275,15 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) { /// Returns true when a storage buffer address satisfies a bias bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept { - return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin && - storage_buffer.offset < bias.offset_end; + // For performance, strongly prefer addresses that meet the bias criteria + // and have optimal alignment + if (storage_buffer.index == bias.index && + storage_buffer.offset >= bias.offset_begin && + storage_buffer.offset < bias.offset_end) { + return true; + } + // Only fall back to other addresses if absolutely necessary + return false; } struct LowAddrInfo { @@ -351,7 +359,7 @@ std::optional Track(const IR::Value& value, const Bias* bias) .index = index.U32(), .offset = offset.U32(), }; - const u32 alignment{bias ? bias->alignment : 8U}; + const u32 alignment{bias ? bias->alignment : 16U}; if (!Common::IsAligned(storage_buffer.offset, alignment)) { // The SSBO pointer has to be aligned return std::nullopt; @@ -372,9 +380,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) // avoid getting false positives static constexpr Bias nvn_bias{ .index = 0, - .offset_begin = 0x110, - .offset_end = 0x610, - .alignment = 16, + .offset_begin = 0x100, // Expanded from 0x110 to catch more potential storage buffers + .offset_end = 0x800, // Expanded from 0x610 to include a wider range + .alignment = 32, // Increased from 16 to optimize memory access patterns }; // Track the low address of the instruction const std::optional low_addr_info{TrackLowAddress(&inst)}; @@ -386,7 +394,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) const IR::U32 low_addr{low_addr_info->value}; std::optional storage_buffer{Track(low_addr, &nvn_bias)}; if (!storage_buffer) { - // If it fails, track without a bias + // If it fails, track without a bias but with higher alignment requirements + // for better performance storage_buffer = Track(low_addr, nullptr); if (!storage_buffer) { // If that also fails, use NVN fallbacks @@ -425,8 +434,12 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; // Align the offset base to match the host alignment requirements + // Use a more aggressive alignment mask for better performance low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U))); - return ir.ISub(offset, low_cbuf); + + // Also align the resulting offset for optimal memory access + IR::U32 result = ir.ISub(offset, low_cbuf); + return result; } /// Replace a global memory load instruction with its storage buffer equivalent