mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-05-16 23:27:00 +02:00
[vk, qcom] Samplers Budget Management
This commit is contained in:
parent
f50348d483
commit
5ed257a238
6 changed files with 112 additions and 0 deletions
|
|
@ -1527,6 +1527,10 @@ bool TextureCacheRuntime::CanReportMemoryUsage() const {
|
||||||
return device.CanReportMemoryUsage();
|
return device.CanReportMemoryUsage();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<size_t> TextureCacheRuntime::GetSamplerHeapBudget() const {
|
||||||
|
return device.GetSamplerHeapBudget();
|
||||||
|
}
|
||||||
|
|
||||||
void TextureCacheRuntime::TickFrame() {}
|
void TextureCacheRuntime::TickFrame() {}
|
||||||
|
|
||||||
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
|
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
|
||||||
|
|
|
||||||
|
|
@ -62,6 +62,8 @@ public:
|
||||||
|
|
||||||
bool CanReportMemoryUsage() const;
|
bool CanReportMemoryUsage() const;
|
||||||
|
|
||||||
|
std::optional<size_t> GetSamplerHeapBudget() const;
|
||||||
|
|
||||||
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
|
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
|
||||||
const Region2D& dst_region, const Region2D& src_region,
|
const Region2D& dst_region, const Region2D& src_region,
|
||||||
Tegra::Engines::Fermi2D::Filter filter,
|
Tegra::Engines::Fermi2D::Filter filter,
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,8 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <limits>
|
||||||
|
#include <optional>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
|
|
||||||
|
|
@ -1736,11 +1738,89 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
|
||||||
}
|
}
|
||||||
const auto [pair, is_new] = channel_state->samplers.try_emplace(config);
|
const auto [pair, is_new] = channel_state->samplers.try_emplace(config);
|
||||||
if (is_new) {
|
if (is_new) {
|
||||||
|
EnforceSamplerBudget();
|
||||||
pair->second = slot_samplers.insert(runtime, config);
|
pair->second = slot_samplers.insert(runtime, config);
|
||||||
}
|
}
|
||||||
return pair->second;
|
return pair->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
std::optional<size_t> TextureCache<P>::QuerySamplerBudget() const {
|
||||||
|
if constexpr (requires { runtime.GetSamplerHeapBudget(); }) {
|
||||||
|
return runtime.GetSamplerHeapBudget();
|
||||||
|
} else {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
void TextureCache<P>::EnforceSamplerBudget() {
|
||||||
|
const auto budget = QuerySamplerBudget();
|
||||||
|
if (!budget) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (slot_samplers.size() < *budget) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!channel_state) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (last_sampler_gc_frame == frame_tick) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
last_sampler_gc_frame = frame_tick;
|
||||||
|
TrimInactiveSamplers(*budget);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
void TextureCache<P>::TrimInactiveSamplers(size_t budget) {
|
||||||
|
if (channel_state->samplers.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
static constexpr size_t SAMPLER_GC_SLACK = 1024;
|
||||||
|
auto mark_active = [](auto& set, SamplerId id) {
|
||||||
|
if (!id || id == CORRUPT_ID || id == NULL_SAMPLER_ID) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
set.insert(id);
|
||||||
|
};
|
||||||
|
std::unordered_set<SamplerId> active;
|
||||||
|
active.reserve(channel_state->graphics_sampler_ids.size() +
|
||||||
|
channel_state->compute_sampler_ids.size());
|
||||||
|
for (const SamplerId id : channel_state->graphics_sampler_ids) {
|
||||||
|
mark_active(active, id);
|
||||||
|
}
|
||||||
|
for (const SamplerId id : channel_state->compute_sampler_ids) {
|
||||||
|
mark_active(active, id);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t removed = 0;
|
||||||
|
auto& sampler_map = channel_state->samplers;
|
||||||
|
for (auto it = sampler_map.begin(); it != sampler_map.end();) {
|
||||||
|
const SamplerId sampler_id = it->second;
|
||||||
|
if (!sampler_id || sampler_id == CORRUPT_ID) {
|
||||||
|
it = sampler_map.erase(it);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (active.find(sampler_id) != active.end()) {
|
||||||
|
++it;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
slot_samplers.erase(sampler_id);
|
||||||
|
it = sampler_map.erase(it);
|
||||||
|
++removed;
|
||||||
|
if (slot_samplers.size() + SAMPLER_GC_SLACK <= budget) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (removed != 0) {
|
||||||
|
LOG_WARNING(HW_GPU,
|
||||||
|
"Sampler cache exceeded {} entries on this driver; reclaimed {} inactive samplers",
|
||||||
|
budget, removed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
ImageViewId TextureCache<P>::FindColorBuffer(size_t index) {
|
ImageViewId TextureCache<P>::FindColorBuffer(size_t index) {
|
||||||
const auto& regs = maxwell3d->regs;
|
const auto& regs = maxwell3d->regs;
|
||||||
|
|
|
||||||
|
|
@ -429,6 +429,9 @@ private:
|
||||||
|
|
||||||
void QueueAsyncDecode(Image& image, ImageId image_id);
|
void QueueAsyncDecode(Image& image, ImageId image_id);
|
||||||
void TickAsyncDecode();
|
void TickAsyncDecode();
|
||||||
|
void EnforceSamplerBudget();
|
||||||
|
void TrimInactiveSamplers(size_t budget);
|
||||||
|
std::optional<size_t> QuerySamplerBudget() const;
|
||||||
|
|
||||||
Runtime& runtime;
|
Runtime& runtime;
|
||||||
|
|
||||||
|
|
@ -500,6 +503,7 @@ private:
|
||||||
|
|
||||||
u64 modification_tick = 0;
|
u64 modification_tick = 0;
|
||||||
u64 frame_tick = 0;
|
u64 frame_tick = 0;
|
||||||
|
u64 last_sampler_gc_frame = (std::numeric_limits<u64>::max)();
|
||||||
|
|
||||||
Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
|
Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
|
||||||
std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;
|
std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;
|
||||||
|
|
|
||||||
|
|
@ -606,6 +606,17 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||||
version == VK_MAKE_API_VERSION(0, 512, 800, 51)) {
|
version == VK_MAKE_API_VERSION(0, 512, 800, 51)) {
|
||||||
has_broken_parallel_compiling = true;
|
has_broken_parallel_compiling = true;
|
||||||
}
|
}
|
||||||
|
const size_t sampler_limit = properties.properties.limits.maxSamplerAllocationCount;
|
||||||
|
if (sampler_limit > 0) {
|
||||||
|
constexpr size_t MIN_SAMPLER_BUDGET = 1024U;
|
||||||
|
const size_t derived_budget =
|
||||||
|
(std::max)(MIN_SAMPLER_BUDGET, sampler_limit / 4U);
|
||||||
|
sampler_heap_budget = derived_budget;
|
||||||
|
LOG_WARNING(Render_Vulkan,
|
||||||
|
"Qualcomm driver reports max {} samplers; clamping cache to {} (25%) to "
|
||||||
|
"avoid heap exhaustion",
|
||||||
|
sampler_limit, sampler_heap_budget);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (extensions.sampler_filter_minmax && is_amd) {
|
if (extensions.sampler_filter_minmax && is_amd) {
|
||||||
|
|
@ -1542,6 +1553,13 @@ void Device::SetupFamilies(VkSurfaceKHR surface) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<size_t> Device::GetSamplerHeapBudget() const {
|
||||||
|
if (sampler_heap_budget == 0) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
return sampler_heap_budget;
|
||||||
|
}
|
||||||
|
|
||||||
u64 Device::GetDeviceMemoryUsage() const {
|
u64 Device::GetDeviceMemoryUsage() const {
|
||||||
VkPhysicalDeviceMemoryBudgetPropertiesEXT budget;
|
VkPhysicalDeviceMemoryBudgetPropertiesEXT budget;
|
||||||
budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
|
budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <optional>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
@ -759,6 +760,8 @@ public:
|
||||||
return has_broken_parallel_compiling;
|
return has_broken_parallel_compiling;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<size_t> GetSamplerHeapBudget() const;
|
||||||
|
|
||||||
/// Returns the vendor name reported from Vulkan.
|
/// Returns the vendor name reported from Vulkan.
|
||||||
std::string_view GetVendorName() const {
|
std::string_view GetVendorName() const {
|
||||||
return properties.driver.driverName;
|
return properties.driver.driverName;
|
||||||
|
|
@ -1055,6 +1058,7 @@ private:
|
||||||
bool dynamic_state3_alpha_to_coverage{};
|
bool dynamic_state3_alpha_to_coverage{};
|
||||||
bool dynamic_state3_alpha_to_one{};
|
bool dynamic_state3_alpha_to_one{};
|
||||||
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
|
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
|
||||||
|
size_t sampler_heap_budget{}; ///< Sampler budget for buggy drivers (0 = unlimited).
|
||||||
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
|
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
|
||||||
u32 sets_per_pool{}; ///< Sets per Description Pool
|
u32 sets_per_pool{}; ///< Sets per Description Pool
|
||||||
NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer};
|
NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer};
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue