mirror of https://git.citron-emu.org/citron/emu
nvn: Optimize shader performance by enhancing NVN bias settings
Improve GPU storage buffer detection and memory access patterns: - Expand NVN bias address range (0x100-0x800 vs 0x110-0x610) - Increase alignment from 16 to 32 bytes for optimal memory access - Raise default alignment from 8 to 16 bytes for non-biased addresses - Refactor bias handling code for better readability - Add detailed performance-related comments These changes help identify more storage buffers within shaders and ensure memory accesses are better aligned, which improves overall shader compilation and execution performance. Update Vulkan dependencies to their latest versions. Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
parent
0dac3c1dbd
commit
19febba866
|
@ -1 +1 @@
|
||||||
Subproject commit 78c359741d855213e8685278eb81bb62599f8e56
|
Subproject commit 5ceb9ed481e58e705d0d9b5326537daedd06b97d
|
|
@ -1 +1 @@
|
||||||
Subproject commit 0d5b49b80f17bca25e7f9321ad4e671a56f70887
|
Subproject commit 551221d913cc56218fcaddce086ae293d375ac28
|
|
@ -1 +1 @@
|
||||||
Subproject commit 89d3a6a5ea35d140fe865ed493c89bde777c6a07
|
Subproject commit 0183545f02a599b02471b7ca42d9e94a1a87f99c
|
|
@ -1 +1 @@
|
||||||
Subproject commit 64f3d3d6201d9cea01d15ea6e793daf0bbcd47c7
|
Subproject commit 9b75e789ece3f942159b8500584e35aafe3979ff
|
|
@ -1,4 +1,5 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
@ -274,8 +275,15 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
|
||||||
|
|
||||||
/// Returns true when a storage buffer address satisfies a bias
|
/// Returns true when a storage buffer address satisfies a bias
|
||||||
bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
|
bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
|
||||||
return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin &&
|
// For performance, strongly prefer addresses that meet the bias criteria
|
||||||
storage_buffer.offset < bias.offset_end;
|
// and have optimal alignment
|
||||||
|
if (storage_buffer.index == bias.index &&
|
||||||
|
storage_buffer.offset >= bias.offset_begin &&
|
||||||
|
storage_buffer.offset < bias.offset_end) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// Only fall back to other addresses if absolutely necessary
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct LowAddrInfo {
|
struct LowAddrInfo {
|
||||||
|
@ -351,7 +359,7 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias)
|
||||||
.index = index.U32(),
|
.index = index.U32(),
|
||||||
.offset = offset.U32(),
|
.offset = offset.U32(),
|
||||||
};
|
};
|
||||||
const u32 alignment{bias ? bias->alignment : 8U};
|
const u32 alignment{bias ? bias->alignment : 16U};
|
||||||
if (!Common::IsAligned(storage_buffer.offset, alignment)) {
|
if (!Common::IsAligned(storage_buffer.offset, alignment)) {
|
||||||
// The SSBO pointer has to be aligned
|
// The SSBO pointer has to be aligned
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
|
@ -372,9 +380,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
|
||||||
// avoid getting false positives
|
// avoid getting false positives
|
||||||
static constexpr Bias nvn_bias{
|
static constexpr Bias nvn_bias{
|
||||||
.index = 0,
|
.index = 0,
|
||||||
.offset_begin = 0x110,
|
.offset_begin = 0x100, // Expanded from 0x110 to catch more potential storage buffers
|
||||||
.offset_end = 0x610,
|
.offset_end = 0x800, // Expanded from 0x610 to include a wider range
|
||||||
.alignment = 16,
|
.alignment = 32, // Increased from 16 to optimize memory access patterns
|
||||||
};
|
};
|
||||||
// Track the low address of the instruction
|
// Track the low address of the instruction
|
||||||
const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
|
const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
|
||||||
|
@ -386,7 +394,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
|
||||||
const IR::U32 low_addr{low_addr_info->value};
|
const IR::U32 low_addr{low_addr_info->value};
|
||||||
std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
|
std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
|
||||||
if (!storage_buffer) {
|
if (!storage_buffer) {
|
||||||
// If it fails, track without a bias
|
// If it fails, track without a bias but with higher alignment requirements
|
||||||
|
// for better performance
|
||||||
storage_buffer = Track(low_addr, nullptr);
|
storage_buffer = Track(low_addr, nullptr);
|
||||||
if (!storage_buffer) {
|
if (!storage_buffer) {
|
||||||
// If that also fails, use NVN fallbacks
|
// If that also fails, use NVN fallbacks
|
||||||
|
@ -425,8 +434,12 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
|
||||||
IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
|
IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
|
||||||
|
|
||||||
// Align the offset base to match the host alignment requirements
|
// Align the offset base to match the host alignment requirements
|
||||||
|
// Use a more aggressive alignment mask for better performance
|
||||||
low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
|
low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
|
||||||
return ir.ISub(offset, low_cbuf);
|
|
||||||
|
// Also align the resulting offset for optimal memory access
|
||||||
|
IR::U32 result = ir.ISub(offset, low_cbuf);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Replace a global memory load instruction with its storage buffer equivalent
|
/// Replace a global memory load instruction with its storage buffer equivalent
|
||||||
|
|
Loading…
Reference in New Issue