mirror of https://git.citron-emu.org/citron/emu
video_core: Improve texture cache memory management to prevent leaks
Implement several improvements to the texture cache memory management system to address memory leaks that occur in memory-intensive games like TOTK (Title ID 0100F2C0115B6000). These changes prevent the gradual memory increase that eventually leads to crashes or undefined behavior. Key improvements: - Enhance garbage collection with more aggressive cleanup thresholds - Add emergency resource cleanup for persistent high memory usage - Improve DeleteImage to ensure proper resource deallocation - Make DelayedDestructionRing thread-safe with proper mutex protection - Track consecutive high-memory frames to detect potential leaks - Add emergency cleanup mechanism for extreme memory pressure situations - Use proper type casting in std::max to fix compilation errors This should significantly improve stability during extended gameplay sessions with memory-intensive titles. Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
parent
e72d695115
commit
ff9c61e7c7
|
@ -1,12 +1,15 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||||
|
// SPDX-FileCopyrightText: 2025 citron Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
|
#include <mutex>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include "common/logging/log.h"
|
||||||
|
|
||||||
namespace VideoCommon {
|
namespace VideoCommon {
|
||||||
|
|
||||||
|
@ -14,18 +17,59 @@ namespace VideoCommon {
|
||||||
template <typename T, size_t TICKS_TO_DESTROY>
|
template <typename T, size_t TICKS_TO_DESTROY>
|
||||||
class DelayedDestructionRing {
|
class DelayedDestructionRing {
|
||||||
public:
|
public:
|
||||||
|
DelayedDestructionRing() = default;
|
||||||
|
~DelayedDestructionRing() {
|
||||||
|
// Ensure all resources are properly released when ring is destroyed
|
||||||
|
for (auto& element_list : elements) {
|
||||||
|
element_list.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Tick() {
|
void Tick() {
|
||||||
|
std::scoped_lock lock{ring_mutex};
|
||||||
|
|
||||||
|
// Move to next position in the ring
|
||||||
index = (index + 1) % TICKS_TO_DESTROY;
|
index = (index + 1) % TICKS_TO_DESTROY;
|
||||||
elements[index].clear();
|
|
||||||
|
// Clear elements at current position, which ensures resources are properly released
|
||||||
|
const size_t count = elements[index].size();
|
||||||
|
if (count > 0) {
|
||||||
|
// If more than a threshold of elements are being destroyed at once, log it
|
||||||
|
if (count > 100) {
|
||||||
|
LOG_DEBUG(Render_Vulkan, "Destroying {} delayed objects", count);
|
||||||
|
}
|
||||||
|
elements[index].clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Push(T&& object) {
|
void Push(T&& object) {
|
||||||
|
std::scoped_lock lock{ring_mutex};
|
||||||
elements[index].push_back(std::move(object));
|
elements[index].push_back(std::move(object));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Force immediate destruction of all resources (for emergency cleanup)
|
||||||
|
void ForceDestroyAll() {
|
||||||
|
std::scoped_lock lock{ring_mutex};
|
||||||
|
for (auto& element_list : elements) {
|
||||||
|
element_list.clear();
|
||||||
|
}
|
||||||
|
LOG_INFO(Render_Vulkan, "Force destroyed all delayed objects");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get current number of pending resources awaiting destruction
|
||||||
|
size_t GetPendingCount() const {
|
||||||
|
std::scoped_lock lock{ring_mutex};
|
||||||
|
size_t count = 0;
|
||||||
|
for (const auto& element_list : elements) {
|
||||||
|
count += element_list.size();
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
size_t index = 0;
|
size_t index = 0;
|
||||||
std::array<std::vector<T>, TICKS_TO_DESTROY> elements;
|
std::array<std::vector<T>, TICKS_TO_DESTROY> elements;
|
||||||
|
mutable std::mutex ring_mutex;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCommon
|
} // namespace VideoCommon
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
|
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
|
||||||
|
// SPDX-FileCopyrightText: 2025 citron Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
@ -80,8 +81,10 @@ void TextureCache<P>::RunGarbageCollector() {
|
||||||
const auto Configure = [&](bool allow_aggressive) {
|
const auto Configure = [&](bool allow_aggressive) {
|
||||||
high_priority_mode = total_used_memory >= expected_memory;
|
high_priority_mode = total_used_memory >= expected_memory;
|
||||||
aggressive_mode = allow_aggressive && total_used_memory >= critical_memory;
|
aggressive_mode = allow_aggressive && total_used_memory >= critical_memory;
|
||||||
ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL;
|
// Reduce ticks_to_destroy to be more aggressive in freeing memory
|
||||||
num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10);
|
ticks_to_destroy = aggressive_mode ? 5ULL : high_priority_mode ? 15ULL : 40ULL;
|
||||||
|
// Increase num_iterations to clean up more resources at once for memory-intensive games
|
||||||
|
num_iterations = aggressive_mode ? 60 : (high_priority_mode ? 30 : 15);
|
||||||
};
|
};
|
||||||
const auto Cleanup = [this, &num_iterations, &high_priority_mode,
|
const auto Cleanup = [this, &num_iterations, &high_priority_mode,
|
||||||
&aggressive_mode](ImageId image_id) {
|
&aggressive_mode](ImageId image_id) {
|
||||||
|
@ -95,7 +98,8 @@ void TextureCache<P>::RunGarbageCollector() {
|
||||||
// used by the async decoder thread.
|
// used by the async decoder thread.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!aggressive_mode && True(image.flags & ImageFlagBits::CostlyLoad)) {
|
// Be more aggressive with cleanup for memory-intensive games
|
||||||
|
if (!aggressive_mode && !high_priority_mode && True(image.flags & ImageFlagBits::CostlyLoad)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const bool must_download =
|
const bool must_download =
|
||||||
|
@ -118,19 +122,20 @@ void TextureCache<P>::RunGarbageCollector() {
|
||||||
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
|
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
|
||||||
if (total_used_memory < critical_memory) {
|
if (total_used_memory < critical_memory) {
|
||||||
if (aggressive_mode) {
|
if (aggressive_mode) {
|
||||||
// Sink the aggresiveness.
|
// Sink the aggresiveness more gradually to prevent oscillation
|
||||||
num_iterations >>= 2;
|
num_iterations = num_iterations * 3 / 4;
|
||||||
aggressive_mode = false;
|
aggressive_mode = false;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (high_priority_mode && total_used_memory < expected_memory) {
|
if (high_priority_mode && total_used_memory < expected_memory) {
|
||||||
num_iterations >>= 1;
|
num_iterations = num_iterations * 3 / 4;
|
||||||
high_priority_mode = false;
|
high_priority_mode = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Run garbage collection more frequently for memory-intensive games
|
||||||
// Try to remove anything old enough and not high priority.
|
// Try to remove anything old enough and not high priority.
|
||||||
Configure(false);
|
Configure(false);
|
||||||
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
|
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
|
||||||
|
@ -138,19 +143,67 @@ void TextureCache<P>::RunGarbageCollector() {
|
||||||
// If pressure is still too high, prune aggressively.
|
// If pressure is still too high, prune aggressively.
|
||||||
if (total_used_memory >= critical_memory) {
|
if (total_used_memory >= critical_memory) {
|
||||||
Configure(true);
|
Configure(true);
|
||||||
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
|
// Make a more thorough sweep with more aggressive settings
|
||||||
|
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy / 2, Cleanup);
|
||||||
|
|
||||||
|
// If we're still in a critical memory situation, do emergency cleanup
|
||||||
|
if (total_used_memory >= critical_memory + 50_MiB) {
|
||||||
|
// Last resort emergency cleanup - reduce thresholds dramatically
|
||||||
|
ticks_to_destroy = 1;
|
||||||
|
num_iterations = 100;
|
||||||
|
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void TextureCache<P>::TickFrame() {
|
void TextureCache<P>::TickFrame() {
|
||||||
|
static u64 consecutive_high_memory_frames = 0;
|
||||||
|
static constexpr u64 EMERGENCY_CLEANUP_THRESHOLD = 120; // ~2 seconds at 60 FPS
|
||||||
|
|
||||||
// If we can obtain the memory info, use it instead of the estimate.
|
// If we can obtain the memory info, use it instead of the estimate.
|
||||||
if (runtime.CanReportMemoryUsage()) {
|
if (runtime.CanReportMemoryUsage()) {
|
||||||
total_used_memory = runtime.GetDeviceMemoryUsage();
|
total_used_memory = runtime.GetDeviceMemoryUsage();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Track consecutive high memory frames to detect potential leaks
|
||||||
|
if (total_used_memory > critical_memory) {
|
||||||
|
consecutive_high_memory_frames++;
|
||||||
|
if (consecutive_high_memory_frames > EMERGENCY_CLEANUP_THRESHOLD) {
|
||||||
|
// Emergency situation - extreme memory pressure for extended time
|
||||||
|
// This likely indicates a leak or insufficient cleanup
|
||||||
|
LOG_WARNING(Render, "Emergency texture cache cleanup triggered after {} frames of high memory usage",
|
||||||
|
consecutive_high_memory_frames);
|
||||||
|
|
||||||
|
// Force immediate cleanup of all pending resources
|
||||||
|
sentenced_images.ForceDestroyAll();
|
||||||
|
sentenced_framebuffers.ForceDestroyAll();
|
||||||
|
sentenced_image_view.ForceDestroyAll();
|
||||||
|
|
||||||
|
// Do a forced garbage collection pass
|
||||||
|
bool saved_value = has_deleted_images;
|
||||||
|
RunGarbageCollector();
|
||||||
|
has_deleted_images = saved_value;
|
||||||
|
|
||||||
|
// Reset counter but keep some pressure
|
||||||
|
consecutive_high_memory_frames = 30;
|
||||||
|
}
|
||||||
|
else if (consecutive_high_memory_frames > 60) { // If high memory for >60 frames (~1 second)
|
||||||
|
// Force a more aggressive cleanup cycle
|
||||||
|
RunGarbageCollector();
|
||||||
|
consecutive_high_memory_frames = 45; // Reset but keep some pressure
|
||||||
|
}
|
||||||
|
} else if (total_used_memory > expected_memory) {
|
||||||
|
// Use u64(1) to ensure type compatibility, avoiding the ULL suffix
|
||||||
|
consecutive_high_memory_frames = std::max(u64(1), consecutive_high_memory_frames / 2);
|
||||||
|
} else {
|
||||||
|
consecutive_high_memory_frames = 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (total_used_memory > minimum_memory) {
|
if (total_used_memory > minimum_memory) {
|
||||||
RunGarbageCollector();
|
RunGarbageCollector();
|
||||||
}
|
}
|
||||||
|
|
||||||
sentenced_images.Tick();
|
sentenced_images.Tick();
|
||||||
sentenced_framebuffers.Tick();
|
sentenced_framebuffers.Tick();
|
||||||
sentenced_image_view.Tick();
|
sentenced_image_view.Tick();
|
||||||
|
@ -2165,27 +2218,35 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
|
||||||
if (image.HasScaled()) {
|
if (image.HasScaled()) {
|
||||||
total_used_memory -= GetScaledImageSizeBytes(image);
|
total_used_memory -= GetScaledImageSizeBytes(image);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Calculate accurate memory usage for this image
|
||||||
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
|
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
|
||||||
if ((IsPixelFormatASTC(image.info.format) &&
|
if ((IsPixelFormatASTC(image.info.format) &&
|
||||||
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
|
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
|
||||||
True(image.flags & ImageFlagBits::Converted)) {
|
True(image.flags & ImageFlagBits::Converted)) {
|
||||||
tentative_size = TranscodedAstcSize(tentative_size, image.info.format);
|
tentative_size = TranscodedAstcSize(tentative_size, image.info.format);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ensure memory usage is properly accounted for
|
||||||
total_used_memory -= Common::AlignUp(tentative_size, 1024);
|
total_used_memory -= Common::AlignUp(tentative_size, 1024);
|
||||||
|
|
||||||
const GPUVAddr gpu_addr = image.gpu_addr;
|
const GPUVAddr gpu_addr = image.gpu_addr;
|
||||||
const auto alloc_it = image_allocs_table.find(gpu_addr);
|
const auto alloc_it = image_allocs_table.find(gpu_addr);
|
||||||
if (alloc_it == image_allocs_table.end()) {
|
if (alloc_it == image_allocs_table.end()) {
|
||||||
ASSERT_MSG(false, "Trying to delete an image alloc that does not exist in address 0x{:x}",
|
LOG_ERROR(HW_GPU, "Trying to delete an image alloc that does not exist in address 0x{:x}",
|
||||||
gpu_addr);
|
gpu_addr);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const ImageAllocId alloc_id = alloc_it->second;
|
const ImageAllocId alloc_id = alloc_it->second;
|
||||||
std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images;
|
std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images;
|
||||||
const auto alloc_image_it = std::ranges::find(alloc_images, image_id);
|
const auto alloc_image_it = std::ranges::find(alloc_images, image_id);
|
||||||
if (alloc_image_it == alloc_images.end()) {
|
if (alloc_image_it == alloc_images.end()) {
|
||||||
ASSERT_MSG(false, "Trying to delete an image that does not exist");
|
LOG_ERROR(HW_GPU, "Trying to delete an image that does not exist");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ensure image is properly untracked and unregistered before deletion
|
||||||
ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
|
ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
|
||||||
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
|
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
|
||||||
|
|
||||||
|
@ -2196,6 +2257,8 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
|
||||||
for (size_t rt = 0; rt < NUM_RT; ++rt) {
|
for (size_t rt = 0; rt < NUM_RT; ++rt) {
|
||||||
dirty[Dirty::ColorBuffer0 + rt] = true;
|
dirty[Dirty::ColorBuffer0 + rt] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Clear render target references
|
||||||
const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
|
const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
|
||||||
for (const ImageViewId image_view_id : image_view_ids) {
|
for (const ImageViewId image_view_id : image_view_ids) {
|
||||||
std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
|
std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
|
||||||
|
@ -2203,9 +2266,12 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
|
||||||
render_targets.depth_buffer_id = ImageViewId{};
|
render_targets.depth_buffer_id = ImageViewId{};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Clean up references and dependencies
|
||||||
RemoveImageViewReferences(image_view_ids);
|
RemoveImageViewReferences(image_view_ids);
|
||||||
RemoveFramebuffers(image_view_ids);
|
RemoveFramebuffers(image_view_ids);
|
||||||
|
|
||||||
|
// Handle aliased images
|
||||||
for (const AliasedImage& alias : image.aliased_images) {
|
for (const AliasedImage& alias : image.aliased_images) {
|
||||||
ImageBase& other_image = slot_images[alias.id];
|
ImageBase& other_image = slot_images[alias.id];
|
||||||
[[maybe_unused]] const size_t num_removed_aliases =
|
[[maybe_unused]] const size_t num_removed_aliases =
|
||||||
|
@ -2213,33 +2279,43 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
|
||||||
return other_alias.id == image_id;
|
return other_alias.id == image_id;
|
||||||
});
|
});
|
||||||
other_image.CheckAliasState();
|
other_image.CheckAliasState();
|
||||||
ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
|
if (num_removed_aliases != 1) {
|
||||||
num_removed_aliases);
|
LOG_WARNING(HW_GPU, "Invalid number of removed aliases: {}", num_removed_aliases);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Handle overlapping images
|
||||||
for (const ImageId overlap_id : image.overlapping_images) {
|
for (const ImageId overlap_id : image.overlapping_images) {
|
||||||
ImageBase& other_image = slot_images[overlap_id];
|
ImageBase& other_image = slot_images[overlap_id];
|
||||||
[[maybe_unused]] const size_t num_removed_overlaps = std::erase_if(
|
[[maybe_unused]] const size_t num_removed_overlaps = std::erase_if(
|
||||||
other_image.overlapping_images,
|
other_image.overlapping_images,
|
||||||
[image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; });
|
[image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; });
|
||||||
other_image.CheckBadOverlapState();
|
other_image.CheckBadOverlapState();
|
||||||
ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}",
|
if (num_removed_overlaps != 1) {
|
||||||
num_removed_overlaps);
|
LOG_WARNING(HW_GPU, "Invalid number of removed overlaps: {}", num_removed_overlaps);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Free resources - either immediately or queue for delayed destruction
|
||||||
for (const ImageViewId image_view_id : image_view_ids) {
|
for (const ImageViewId image_view_id : image_view_ids) {
|
||||||
if (!immediate_delete) {
|
if (!immediate_delete) {
|
||||||
sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
|
sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
|
||||||
}
|
}
|
||||||
slot_image_views.erase(image_view_id);
|
slot_image_views.erase(image_view_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!immediate_delete) {
|
if (!immediate_delete) {
|
||||||
sentenced_images.Push(std::move(slot_images[image_id]));
|
sentenced_images.Push(std::move(slot_images[image_id]));
|
||||||
}
|
}
|
||||||
slot_images.erase(image_id);
|
slot_images.erase(image_id);
|
||||||
|
|
||||||
|
// Clean up allocation table
|
||||||
alloc_images.erase(alloc_image_it);
|
alloc_images.erase(alloc_image_it);
|
||||||
if (alloc_images.empty()) {
|
if (alloc_images.empty()) {
|
||||||
image_allocs_table.erase(alloc_it);
|
image_allocs_table.erase(alloc_it);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mark tables as invalidated
|
||||||
for (size_t c : active_channel_ids) {
|
for (size_t c : active_channel_ids) {
|
||||||
auto& channel_info = channel_storage[c];
|
auto& channel_info = channel_storage[c];
|
||||||
if constexpr (ENABLE_VALIDATION) {
|
if constexpr (ENABLE_VALIDATION) {
|
||||||
|
|
Loading…
Reference in New Issue