video_core: Improve texture cache memory management to prevent leaks

Implement several improvements to the texture cache memory management system
to address memory leaks that occur in memory-intensive games like TOTK
(Title ID 0100F2C0115B6000). These changes prevent the gradual memory
increase that eventually leads to crashes or undefined behavior.

Key improvements:
- Enhance garbage collection with more aggressive cleanup thresholds
- Add emergency resource cleanup for persistent high memory usage
- Improve DeleteImage to ensure proper resource deallocation
- Make DelayedDestructionRing thread-safe with proper mutex protection
- Track consecutive high-memory frames to detect potential leaks
- Add emergency cleanup mechanism for extreme memory pressure situations
- Use proper type casting in std::max to fix compilation errors

This should significantly improve stability during extended gameplay
sessions with memory-intensive titles.

Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
Zephyron 2025-04-20 17:39:14 +10:00
parent e72d695115
commit ff9c61e7c7
2 changed files with 135 additions and 15 deletions

View File

@ -1,12 +1,15 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-FileCopyrightText: 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include <array> #include <array>
#include <cstddef> #include <cstddef>
#include <mutex>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "common/logging/log.h"
namespace VideoCommon { namespace VideoCommon {
@ -14,18 +17,59 @@ namespace VideoCommon {
template <typename T, size_t TICKS_TO_DESTROY> template <typename T, size_t TICKS_TO_DESTROY>
class DelayedDestructionRing { class DelayedDestructionRing {
public: public:
DelayedDestructionRing() = default;
~DelayedDestructionRing() {
// Ensure all resources are properly released when ring is destroyed
for (auto& element_list : elements) {
element_list.clear();
}
}
void Tick() { void Tick() {
std::scoped_lock lock{ring_mutex};
// Move to next position in the ring
index = (index + 1) % TICKS_TO_DESTROY; index = (index + 1) % TICKS_TO_DESTROY;
// Clear elements at current position, which ensures resources are properly released
const size_t count = elements[index].size();
if (count > 0) {
// If more than a threshold of elements are being destroyed at once, log it
if (count > 100) {
LOG_DEBUG(Render_Vulkan, "Destroying {} delayed objects", count);
}
elements[index].clear(); elements[index].clear();
} }
}
void Push(T&& object) { void Push(T&& object) {
std::scoped_lock lock{ring_mutex};
elements[index].push_back(std::move(object)); elements[index].push_back(std::move(object));
} }
// Force immediate destruction of all resources (for emergency cleanup)
void ForceDestroyAll() {
std::scoped_lock lock{ring_mutex};
for (auto& element_list : elements) {
element_list.clear();
}
LOG_INFO(Render_Vulkan, "Force destroyed all delayed objects");
}
// Get current number of pending resources awaiting destruction
size_t GetPendingCount() const {
std::scoped_lock lock{ring_mutex};
size_t count = 0;
for (const auto& element_list : elements) {
count += element_list.size();
}
return count;
}
private: private:
size_t index = 0; size_t index = 0;
std::array<std::vector<T>, TICKS_TO_DESTROY> elements; std::array<std::vector<T>, TICKS_TO_DESTROY> elements;
mutable std::mutex ring_mutex;
}; };
} // namespace VideoCommon } // namespace VideoCommon

View File

@ -1,4 +1,5 @@
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project // SPDX-FileCopyrightText: 2023 yuzu Emulator Project
// SPDX-FileCopyrightText: 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
#pragma once #pragma once
@ -80,8 +81,10 @@ void TextureCache<P>::RunGarbageCollector() {
const auto Configure = [&](bool allow_aggressive) { const auto Configure = [&](bool allow_aggressive) {
high_priority_mode = total_used_memory >= expected_memory; high_priority_mode = total_used_memory >= expected_memory;
aggressive_mode = allow_aggressive && total_used_memory >= critical_memory; aggressive_mode = allow_aggressive && total_used_memory >= critical_memory;
ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; // Reduce ticks_to_destroy to be more aggressive in freeing memory
num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10); ticks_to_destroy = aggressive_mode ? 5ULL : high_priority_mode ? 15ULL : 40ULL;
// Increase num_iterations to clean up more resources at once for memory-intensive games
num_iterations = aggressive_mode ? 60 : (high_priority_mode ? 30 : 15);
}; };
const auto Cleanup = [this, &num_iterations, &high_priority_mode, const auto Cleanup = [this, &num_iterations, &high_priority_mode,
&aggressive_mode](ImageId image_id) { &aggressive_mode](ImageId image_id) {
@ -95,7 +98,8 @@ void TextureCache<P>::RunGarbageCollector() {
// used by the async decoder thread. // used by the async decoder thread.
return false; return false;
} }
if (!aggressive_mode && True(image.flags & ImageFlagBits::CostlyLoad)) { // Be more aggressive with cleanup for memory-intensive games
if (!aggressive_mode && !high_priority_mode && True(image.flags & ImageFlagBits::CostlyLoad)) {
return false; return false;
} }
const bool must_download = const bool must_download =
@ -118,19 +122,20 @@ void TextureCache<P>::RunGarbageCollector() {
DeleteImage(image_id, image.scale_tick > frame_tick + 5); DeleteImage(image_id, image.scale_tick > frame_tick + 5);
if (total_used_memory < critical_memory) { if (total_used_memory < critical_memory) {
if (aggressive_mode) { if (aggressive_mode) {
// Sink the aggresiveness. // Sink the aggresiveness more gradually to prevent oscillation
num_iterations >>= 2; num_iterations = num_iterations * 3 / 4;
aggressive_mode = false; aggressive_mode = false;
return false; return false;
} }
if (high_priority_mode && total_used_memory < expected_memory) { if (high_priority_mode && total_used_memory < expected_memory) {
num_iterations >>= 1; num_iterations = num_iterations * 3 / 4;
high_priority_mode = false; high_priority_mode = false;
} }
} }
return false; return false;
}; };
// Run garbage collection more frequently for memory-intensive games
// Try to remove anything old enough and not high priority. // Try to remove anything old enough and not high priority.
Configure(false); Configure(false);
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup); lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
@ -138,19 +143,67 @@ void TextureCache<P>::RunGarbageCollector() {
// If pressure is still too high, prune aggressively. // If pressure is still too high, prune aggressively.
if (total_used_memory >= critical_memory) { if (total_used_memory >= critical_memory) {
Configure(true); Configure(true);
// Make a more thorough sweep with more aggressive settings
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy / 2, Cleanup);
// If we're still in a critical memory situation, do emergency cleanup
if (total_used_memory >= critical_memory + 50_MiB) {
// Last resort emergency cleanup - reduce thresholds dramatically
ticks_to_destroy = 1;
num_iterations = 100;
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup); lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
} }
} }
}
template <class P> template <class P>
void TextureCache<P>::TickFrame() { void TextureCache<P>::TickFrame() {
static u64 consecutive_high_memory_frames = 0;
static constexpr u64 EMERGENCY_CLEANUP_THRESHOLD = 120; // ~2 seconds at 60 FPS
// If we can obtain the memory info, use it instead of the estimate. // If we can obtain the memory info, use it instead of the estimate.
if (runtime.CanReportMemoryUsage()) { if (runtime.CanReportMemoryUsage()) {
total_used_memory = runtime.GetDeviceMemoryUsage(); total_used_memory = runtime.GetDeviceMemoryUsage();
} }
// Track consecutive high memory frames to detect potential leaks
if (total_used_memory > critical_memory) {
consecutive_high_memory_frames++;
if (consecutive_high_memory_frames > EMERGENCY_CLEANUP_THRESHOLD) {
// Emergency situation - extreme memory pressure for extended time
// This likely indicates a leak or insufficient cleanup
LOG_WARNING(Render, "Emergency texture cache cleanup triggered after {} frames of high memory usage",
consecutive_high_memory_frames);
// Force immediate cleanup of all pending resources
sentenced_images.ForceDestroyAll();
sentenced_framebuffers.ForceDestroyAll();
sentenced_image_view.ForceDestroyAll();
// Do a forced garbage collection pass
bool saved_value = has_deleted_images;
RunGarbageCollector();
has_deleted_images = saved_value;
// Reset counter but keep some pressure
consecutive_high_memory_frames = 30;
}
else if (consecutive_high_memory_frames > 60) { // If high memory for >60 frames (~1 second)
// Force a more aggressive cleanup cycle
RunGarbageCollector();
consecutive_high_memory_frames = 45; // Reset but keep some pressure
}
} else if (total_used_memory > expected_memory) {
// Use u64(1) to ensure type compatibility, avoiding the ULL suffix
consecutive_high_memory_frames = std::max(u64(1), consecutive_high_memory_frames / 2);
} else {
consecutive_high_memory_frames = 0;
}
if (total_used_memory > minimum_memory) { if (total_used_memory > minimum_memory) {
RunGarbageCollector(); RunGarbageCollector();
} }
sentenced_images.Tick(); sentenced_images.Tick();
sentenced_framebuffers.Tick(); sentenced_framebuffers.Tick();
sentenced_image_view.Tick(); sentenced_image_view.Tick();
@ -2165,27 +2218,35 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
if (image.HasScaled()) { if (image.HasScaled()) {
total_used_memory -= GetScaledImageSizeBytes(image); total_used_memory -= GetScaledImageSizeBytes(image);
} }
// Calculate accurate memory usage for this image
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
if ((IsPixelFormatASTC(image.info.format) && if ((IsPixelFormatASTC(image.info.format) &&
True(image.flags & ImageFlagBits::AcceleratedUpload)) || True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
True(image.flags & ImageFlagBits::Converted)) { True(image.flags & ImageFlagBits::Converted)) {
tentative_size = TranscodedAstcSize(tentative_size, image.info.format); tentative_size = TranscodedAstcSize(tentative_size, image.info.format);
} }
// Ensure memory usage is properly accounted for
total_used_memory -= Common::AlignUp(tentative_size, 1024); total_used_memory -= Common::AlignUp(tentative_size, 1024);
const GPUVAddr gpu_addr = image.gpu_addr; const GPUVAddr gpu_addr = image.gpu_addr;
const auto alloc_it = image_allocs_table.find(gpu_addr); const auto alloc_it = image_allocs_table.find(gpu_addr);
if (alloc_it == image_allocs_table.end()) { if (alloc_it == image_allocs_table.end()) {
ASSERT_MSG(false, "Trying to delete an image alloc that does not exist in address 0x{:x}", LOG_ERROR(HW_GPU, "Trying to delete an image alloc that does not exist in address 0x{:x}",
gpu_addr); gpu_addr);
return; return;
} }
const ImageAllocId alloc_id = alloc_it->second; const ImageAllocId alloc_id = alloc_it->second;
std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images; std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images;
const auto alloc_image_it = std::ranges::find(alloc_images, image_id); const auto alloc_image_it = std::ranges::find(alloc_images, image_id);
if (alloc_image_it == alloc_images.end()) { if (alloc_image_it == alloc_images.end()) {
ASSERT_MSG(false, "Trying to delete an image that does not exist"); LOG_ERROR(HW_GPU, "Trying to delete an image that does not exist");
return; return;
} }
// Ensure image is properly untracked and unregistered before deletion
ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
@ -2196,6 +2257,8 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
for (size_t rt = 0; rt < NUM_RT; ++rt) { for (size_t rt = 0; rt < NUM_RT; ++rt) {
dirty[Dirty::ColorBuffer0 + rt] = true; dirty[Dirty::ColorBuffer0 + rt] = true;
} }
// Clear render target references
const std::span<const ImageViewId> image_view_ids = image.image_view_ids; const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
for (const ImageViewId image_view_id : image_view_ids) { for (const ImageViewId image_view_id : image_view_ids) {
std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
@ -2203,9 +2266,12 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
render_targets.depth_buffer_id = ImageViewId{}; render_targets.depth_buffer_id = ImageViewId{};
} }
} }
// Clean up references and dependencies
RemoveImageViewReferences(image_view_ids); RemoveImageViewReferences(image_view_ids);
RemoveFramebuffers(image_view_ids); RemoveFramebuffers(image_view_ids);
// Handle aliased images
for (const AliasedImage& alias : image.aliased_images) { for (const AliasedImage& alias : image.aliased_images) {
ImageBase& other_image = slot_images[alias.id]; ImageBase& other_image = slot_images[alias.id];
[[maybe_unused]] const size_t num_removed_aliases = [[maybe_unused]] const size_t num_removed_aliases =
@ -2213,33 +2279,43 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
return other_alias.id == image_id; return other_alias.id == image_id;
}); });
other_image.CheckAliasState(); other_image.CheckAliasState();
ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", if (num_removed_aliases != 1) {
num_removed_aliases); LOG_WARNING(HW_GPU, "Invalid number of removed aliases: {}", num_removed_aliases);
} }
}
// Handle overlapping images
for (const ImageId overlap_id : image.overlapping_images) { for (const ImageId overlap_id : image.overlapping_images) {
ImageBase& other_image = slot_images[overlap_id]; ImageBase& other_image = slot_images[overlap_id];
[[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if(
other_image.overlapping_images, other_image.overlapping_images,
[image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; });
other_image.CheckBadOverlapState(); other_image.CheckBadOverlapState();
ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", if (num_removed_overlaps != 1) {
num_removed_overlaps); LOG_WARNING(HW_GPU, "Invalid number of removed overlaps: {}", num_removed_overlaps);
} }
}
// Free resources - either immediately or queue for delayed destruction
for (const ImageViewId image_view_id : image_view_ids) { for (const ImageViewId image_view_id : image_view_ids) {
if (!immediate_delete) { if (!immediate_delete) {
sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
} }
slot_image_views.erase(image_view_id); slot_image_views.erase(image_view_id);
} }
if (!immediate_delete) { if (!immediate_delete) {
sentenced_images.Push(std::move(slot_images[image_id])); sentenced_images.Push(std::move(slot_images[image_id]));
} }
slot_images.erase(image_id); slot_images.erase(image_id);
// Clean up allocation table
alloc_images.erase(alloc_image_it); alloc_images.erase(alloc_image_it);
if (alloc_images.empty()) { if (alloc_images.empty()) {
image_allocs_table.erase(alloc_it); image_allocs_table.erase(alloc_it);
} }
// Mark tables as invalidated
for (size_t c : active_channel_ids) { for (size_t c : active_channel_ids) {
auto& channel_info = channel_storage[c]; auto& channel_info = channel_storage[c];
if constexpr (ENABLE_VALIDATION) { if constexpr (ENABLE_VALIDATION) {