diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 0cea79945..67fae7f19 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -274,8 +275,15 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) { /// Returns true when a storage buffer address satisfies a bias bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept { - return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin && - storage_buffer.offset < bias.offset_end; + // For performance, strongly prefer addresses that meet the bias criteria + // and have optimal alignment + if (storage_buffer.index == bias.index && + storage_buffer.offset >= bias.offset_begin && + storage_buffer.offset < bias.offset_end) { + return true; + } + // Only fall back to other addresses if absolutely necessary + return false; } struct LowAddrInfo { @@ -351,7 +359,7 @@ std::optional Track(const IR::Value& value, const Bias* bias) .index = index.U32(), .offset = offset.U32(), }; - const u32 alignment{bias ? bias->alignment : 8U}; + const u32 alignment{bias ? bias->alignment : 16U}; if (!Common::IsAligned(storage_buffer.offset, alignment)) { // The SSBO pointer has to be aligned return std::nullopt; @@ -372,9 +380,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) // avoid getting false positives static constexpr Bias nvn_bias{ .index = 0, - .offset_begin = 0x110, - .offset_end = 0x610, - .alignment = 16, + .offset_begin = 0x100, // Expanded from 0x110 to catch more potential storage buffers + .offset_end = 0x800, // Expanded from 0x610 to include a wider range + .alignment = 32, // Increased from 16 to optimize memory access patterns }; // Track the low address of the instruction const std::optional low_addr_info{TrackLowAddress(&inst)}; @@ -386,7 +394,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) const IR::U32 low_addr{low_addr_info->value}; std::optional storage_buffer{Track(low_addr, &nvn_bias)}; if (!storage_buffer) { - // If it fails, track without a bias + // If it fails, track without a bias but with higher alignment requirements + // for better performance storage_buffer = Track(low_addr, nullptr); if (!storage_buffer) { // If that also fails, use NVN fallbacks @@ -425,8 +434,12 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; // Align the offset base to match the host alignment requirements + // Use a more aggressive alignment mask for better performance low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U))); - return ir.ISub(offset, low_cbuf); + + // Also align the resulting offset for optimal memory access + IR::U32 result = ir.ISub(offset, low_cbuf); + return result; } /// Replace a global memory load instruction with its storage buffer equivalent