use map for samplers

This commit is contained in:
lizzie 2026-04-26 01:37:48 +00:00
parent 990a32952e
commit 28f37cadb5
9 changed files with 165 additions and 251 deletions

View file

@ -20,10 +20,14 @@
namespace Common { namespace Common {
struct SlotId { struct SlotId {
static constexpr u32 TAGGED_MASK = 0x7fffffff;
static constexpr u32 TAGGED_VALUE = 0x80000000;
static constexpr u32 INVALID_INDEX = (std::numeric_limits<u32>::max)(); static constexpr u32 INVALID_INDEX = (std::numeric_limits<u32>::max)();
constexpr u32 Value() const noexcept {
return index & (~TAGGED_VALUE);
}
constexpr auto operator<=>(const SlotId&) const noexcept = default; constexpr auto operator<=>(const SlotId&) const noexcept = default;
constexpr explicit operator bool() const noexcept { constexpr explicit operator bool() const noexcept {
return index != INVALID_INDEX; return index != INVALID_INDEX;
} }
@ -47,11 +51,11 @@ public:
Iterator& operator++() noexcept { Iterator& operator++() noexcept {
const u64* const bitset = slot_vector->stored_bitset.data(); const u64* const bitset = slot_vector->stored_bitset.data();
const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64; const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64;
if (id.index < size) { if (id.Value() < size) {
do { do {
++id.index; ++id.index;
} while (id.index < size && !IsValid(bitset)); } while (id.Value() < size && !IsValid(bitset));
if (id.index == size) { if (id.Value() == size) {
id = SlotId{}; id = SlotId{};
} }
} }
@ -85,7 +89,7 @@ public:
: slot_vector{slot_vector_}, id{id_} {} : slot_vector{slot_vector_}, id{id_} {}
bool IsValid(const u64* bitset) const noexcept { bool IsValid(const u64* bitset) const noexcept {
return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0; return ((bitset[id.Value() / 64] >> (id.Value() % 64)) & 1) != 0;
} }
SlotVector<T>* slot_vector; SlotVector<T>* slot_vector;
@ -107,12 +111,12 @@ public:
[[nodiscard]] T& operator[](SlotId id) noexcept { [[nodiscard]] T& operator[](SlotId id) noexcept {
ValidateIndex(id); ValidateIndex(id);
return values[id.index].object; return values[id.Value()].object;
} }
[[nodiscard]] const T& operator[](SlotId id) const noexcept { [[nodiscard]] const T& operator[](SlotId id) const noexcept {
ValidateIndex(id); ValidateIndex(id);
return values[id.index].object; return values[id.Value()].object;
} }
template <typename... Args> template <typename... Args>
@ -125,9 +129,9 @@ public:
} }
void erase(SlotId id) noexcept { void erase(SlotId id) noexcept {
values[id.index].object.~T(); values[id.Value()].object.~T();
free_list.push_back(id.index); free_list.push_back(id.Value());
ResetStorageBit(id.index); ResetStorageBit(id.Value());
} }
[[nodiscard]] Iterator begin() noexcept { [[nodiscard]] Iterator begin() noexcept {
@ -175,8 +179,8 @@ private:
void ValidateIndex(SlotId id) const noexcept { void ValidateIndex(SlotId id) const noexcept {
DEBUG_ASSERT(id); DEBUG_ASSERT(id);
DEBUG_ASSERT(id.index / 64 < stored_bitset.size()); DEBUG_ASSERT(id.Value() / 64 < stored_bitset.size());
DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0); DEBUG_ASSERT(((stored_bitset[id.Value() / 64] >> (id.Value() % 64)) & 1) != 0);
} }
[[nodiscard]] u32 FreeValueIndex() noexcept { [[nodiscard]] u32 FreeValueIndex() noexcept {
@ -208,9 +212,7 @@ private:
const size_t old_free_size = free_list.size(); const size_t old_free_size = free_list.size();
free_list.resize(old_free_size + (new_capacity - values_capacity)); free_list.resize(old_free_size + (new_capacity - values_capacity));
std::iota(free_list.begin() + old_free_size, free_list.end(), std::iota(free_list.begin() + old_free_size, free_list.end(), u32(values_capacity));
static_cast<u32>(values_capacity));
delete[] values; delete[] values;
values = new_values; values = new_values;
values_capacity = new_capacity; values_capacity = new_capacity;

View file

@ -90,7 +90,7 @@ void ComputePipeline::Configure() {
desc.is_written); desc.is_written);
++ssbo_index; ++ssbo_index;
} }
texture_cache.SynchronizeComputeDescriptors(); texture_cache.SynchronizeDescriptors(true);
boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views; boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
boost::container::static_vector<VideoCommon::SamplerId, MAX_TEXTURES> samplers; boost::container::static_vector<VideoCommon::SamplerId, MAX_TEXTURES> samplers;
@ -148,14 +148,14 @@ void ComputePipeline::Configure() {
const auto handle{read_handle(desc, index)}; const auto handle{read_handle(desc, index)};
views.push_back({handle.first}); views.push_back({handle.first});
VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second); VideoCommon::SamplerId sampler = texture_cache.GetSamplerId(handle.second, true);
samplers.push_back(sampler); samplers.push_back(sampler);
} }
} }
for (const auto& desc : info.image_descriptors) { for (const auto& desc : info.image_descriptors) {
add_image(desc, desc.is_written); add_image(desc, desc.is_written);
} }
texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); texture_cache.FillImageViews(std::span(views.data(), views.size()), true);
if (!is_built) { if (!is_built) {
WaitForBuild(); WaitForBuild();

View file

@ -283,7 +283,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
size_t views_index{}; size_t views_index{};
size_t samplers_index{}; size_t samplers_index{};
texture_cache.SynchronizeGraphicsDescriptors(); texture_cache.SynchronizeDescriptors(false);
buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
@ -354,7 +354,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
const auto handle{read_handle(desc, index)}; const auto handle{read_handle(desc, index)};
views[views_index++] = {handle.first}; views[views_index++] = {handle.first};
VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)}; VideoCommon::SamplerId sampler{texture_cache.GetSamplerId(handle.second, false)};
samplers[samplers_index++] = sampler; samplers[samplers_index++] = sampler;
} }
} }
@ -379,7 +379,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (Spec::enabled_stages[4]) { if constexpr (Spec::enabled_stages[4]) {
config_stage(4); config_stage(4);
} }
texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), views_index)); texture_cache.FillImageViews(std::span(views.data(), views_index), false, Spec::has_images);
texture_cache.UpdateRenderTargets(false); texture_cache.UpdateRenderTargets(false);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());

View file

@ -353,13 +353,13 @@ void RasterizerOpenGL::DrawTexture() {
gpu.TickWork(); gpu.TickWork();
}; };
texture_cache.SynchronizeGraphicsDescriptors(); texture_cache.SynchronizeDescriptors(false);
texture_cache.UpdateRenderTargets(false); texture_cache.UpdateRenderTargets(false);
SyncState(); SyncState();
const auto& draw_texture_state = maxwell3d->draw_manager.draw_texture_state; const auto& draw_texture_state = maxwell3d->draw_manager.draw_texture_state;
const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); const auto& sampler = texture_cache.GetSampler(draw_texture_state.src_sampler, true);
const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
const auto Scale = [&](auto dim) -> s32 { const auto Scale = [&](auto dim) -> s32 {

View file

@ -125,7 +125,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
++ssbo_index; ++ssbo_index;
} }
texture_cache.SynchronizeComputeDescriptors(); texture_cache.SynchronizeDescriptors(true);
boost::container::small_vector<VideoCommon::ImageViewInOut, 64> views; boost::container::small_vector<VideoCommon::ImageViewInOut, 64> views;
boost::container::small_vector<VideoCommon::SamplerId, 64> samplers; boost::container::small_vector<VideoCommon::SamplerId, 64> samplers;
@ -173,14 +173,14 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
const auto handle{read_handle(desc, index)}; const auto handle{read_handle(desc, index)};
views.push_back({handle.first}); views.push_back({handle.first});
VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second); VideoCommon::SamplerId sampler = texture_cache.GetSamplerId(handle.second, true);
samplers.push_back(sampler); samplers.push_back(sampler);
} }
} }
for (const auto& desc : info.image_descriptors) { for (const auto& desc : info.image_descriptors) {
add_image(desc, desc.is_written); add_image(desc, desc.is_written);
} }
texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); texture_cache.FillImageViews(std::span(views.data(), views.size()), true);
buffer_cache.UnbindComputeTextureBuffers(); buffer_cache.UnbindComputeTextureBuffers();
size_t index{}; size_t index{};

View file

@ -319,7 +319,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
views.reserve(num_image_elements); views.reserve(num_image_elements);
samplers.reserve(num_textures); samplers.reserve(num_textures);
texture_cache.SynchronizeGraphicsDescriptors(); texture_cache.SynchronizeDescriptors(false);
buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
@ -384,7 +384,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
const auto handle{read_handle(desc, index)}; const auto handle{read_handle(desc, index)};
views.push_back({handle.first}); views.push_back({handle.first});
VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)}; VideoCommon::SamplerId sampler{texture_cache.GetSamplerId(handle.second, false)};
samplers.push_back(sampler); samplers.push_back(sampler);
} }
} }
@ -413,7 +413,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
} }
ASSERT(views.size() == num_image_elements); ASSERT(views.size() == num_image_elements);
ASSERT(samplers.size() == num_textures); ASSERT(samplers.size() == num_textures);
texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), views.size())); texture_cache.FillImageViews(std::span(views.data(), views.size()), false, Spec::has_images);
VideoCommon::ImageViewInOut* texture_buffer_it{views.data()}; VideoCommon::ImageViewInOut* texture_buffer_it{views.data()};
const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {

View file

@ -351,7 +351,7 @@ void RasterizerVulkan::DrawTexture() {
FlushWork(); FlushWork();
std::scoped_lock l{texture_cache.mutex}; std::scoped_lock l{texture_cache.mutex};
texture_cache.SynchronizeGraphicsDescriptors(); texture_cache.SynchronizeDescriptors(false);
texture_cache.UpdateRenderTargets(false); texture_cache.UpdateRenderTargets(false);
UpdateDynamicStates(); UpdateDynamicStates();
@ -359,7 +359,7 @@ void RasterizerVulkan::DrawTexture() {
query_cache.NotifySegment(true); query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, maxwell3d->regs.zpass_pixel_count_enable); query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, maxwell3d->regs.zpass_pixel_count_enable);
const auto& draw_texture_state = maxwell3d->draw_manager.draw_texture_state; const auto& draw_texture_state = maxwell3d->draw_manager.draw_texture_state;
const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); const auto& sampler = texture_cache.GetSampler(draw_texture_state.src_sampler, true);
const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
const auto* framebuffer = texture_cache.GetFramebuffer(); const auto* framebuffer = texture_cache.GetFramebuffer();

View file

@ -14,6 +14,7 @@
#include "common/alignment.h" #include "common/alignment.h"
#include "common/settings.h" #include "common/settings.h"
#include "common/slot_vector.h"
#include "video_core/control/channel_state.h" #include "video_core/control/channel_state.h"
#include "video_core/dirty_flags.h" #include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h" #include "video_core/engines/kepler_compute.h"
@ -204,7 +205,8 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
template <class P> template <class P>
typename P::ImageView& TextureCache<P>::GetImageView(u32 index) noexcept { typename P::ImageView& TextureCache<P>::GetImageView(u32 index) noexcept {
const auto image_view_id = VisitImageView(channel_state->graphics_image_table, channel_state->graphics_image_view_ids, index); // Not compute!
const auto image_view_id = VisitImageView(index, false);
return slot_image_views[image_view_id]; return slot_image_views[image_view_id];
} }
@ -214,14 +216,25 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
} }
template <class P> template <class P>
template <bool has_blacklists> void TextureCache<P>::FillImageViews(std::span<ImageViewInOut> views, bool compute, bool blacklist) {
void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) { bool has_blacklisted = false;
FillImageViews<has_blacklists>(channel_state->graphics_image_table, channel_state->graphics_image_view_ids, views); do {
} has_deleted_images = false;
if (blacklist) {
template <class P> has_blacklisted = false;
void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) { }
FillImageViews<true>(channel_state->compute_image_table, channel_state->compute_image_view_ids, views); for (ImageViewInOut& view : views) {
view.id = VisitImageView(view.index, compute);
if (blacklist) {
if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) {
const ImageViewBase& image_view = slot_image_views[view.id];
auto& image = slot_images[image_view.image_id];
has_blacklisted |= ScaleDown(image);
image.scale_rating = 0;
}
}
}
} while (has_deleted_images || (blacklist && has_blacklisted));
} }
template <class P> template <class P>
@ -289,39 +302,24 @@ void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) {
} }
template <class P> template <class P>
typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { typename P::Sampler* TextureCache<P>::GetSampler(u32 index, bool compute) {
return &slot_samplers[GetGraphicsSamplerId(index)]; return &slot_samplers[GetSamplerId(index, compute)];
} }
template <class P> template <class P>
typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { SamplerId TextureCache<P>::GetSamplerId(u32 index, bool compute) {
return &slot_samplers[GetComputeSamplerId(index)]; auto& table = compute ? channel_state->compute_sampler_table : channel_state->graphics_sampler_table;
} if (index > table.current_limit) {
template <class P>
SamplerId TextureCache<P>::GetGraphicsSamplerId(u32 index) {
if (index > channel_state->graphics_sampler_table.current_limit) {
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
return NULL_SAMPLER_ID; return NULL_SAMPLER_ID;
} }
const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(*gpu_memory, index); auto const [descriptor, is_new] = table.Read(*gpu_memory, index);
SamplerId& id = channel_state->graphics_sampler_ids[index]; if (is_new) {
if (is_new) auto const id = FindSampler(descriptor, compute);
id = FindSampler(descriptor); channel_state->sampler_ids.insert_or_assign(index | (compute ? Common::SlotId::TAGGED_VALUE : 0), id);
return id; return id;
}
template <class P>
SamplerId TextureCache<P>::GetComputeSamplerId(u32 index) {
if (index > channel_state->compute_sampler_table.current_limit) {
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
return NULL_SAMPLER_ID;
} }
const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(*gpu_memory, index); return channel_state->sampler_ids.find(index | (compute ? Common::SlotId::TAGGED_VALUE : 0))->second;
SamplerId& id = channel_state->compute_sampler_ids[index];
if (is_new)
id = FindSampler(descriptor);
return id;
} }
template <class P> template <class P>
@ -335,40 +333,31 @@ typename P::Sampler& TextureCache<P>::GetSampler(SamplerId id) noexcept {
} }
template <class P> template <class P>
void TextureCache<P>::SynchronizeGraphicsDescriptors() { void TextureCache<P>::SynchronizeDescriptors(bool compute) {
const bool linked_tsc = maxwell3d->regs.sampler_binding == Tegra::Engines::Maxwell3D::Regs::SamplerBinding::ViaHeaderBinding; if (compute) {
const u32 tic_limit = maxwell3d->regs.tex_header.limit; const bool linked_tsc = kepler_compute->launch_description.linked_tsc;
const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tex_sampler.limit; const u32 tic_limit = kepler_compute->regs.tic.limit;
bool bindings_changed = false; const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(), tsc_limit)) { bool bindings_changed = false;
channel_state->graphics_sampler_ids.resize(tsc_limit + 1); if (channel_state->compute_sampler_table.Synchronize(kepler_compute->regs.tsc.Address(), tsc_limit))
bindings_changed = true; bindings_changed = true;
} if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(), tic_limit))
if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(), tic_limit)) { bindings_changed = true;
channel_state->graphics_image_view_ids.resize(tic_limit + 1); if (bindings_changed) {
bindings_changed = true; ++texture_bindings_serial;
} }
if (bindings_changed) { } else {
++texture_bindings_serial; const bool linked_tsc = maxwell3d->regs.sampler_binding == Tegra::Engines::Maxwell3D::Regs::SamplerBinding::ViaHeaderBinding;
} const u32 tic_limit = maxwell3d->regs.tex_header.limit;
} const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tex_sampler.limit;
bool bindings_changed = false;
template <class P> if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(), tsc_limit))
void TextureCache<P>::SynchronizeComputeDescriptors() { bindings_changed = true;
const bool linked_tsc = kepler_compute->launch_description.linked_tsc; if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(), tic_limit))
const u32 tic_limit = kepler_compute->regs.tic.limit; bindings_changed = true;
const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit; if (bindings_changed) {
bool bindings_changed = false; ++texture_bindings_serial;
if (channel_state->compute_sampler_table.Synchronize(kepler_compute->regs.tsc.Address(), tsc_limit)) { }
channel_state->compute_sampler_ids.resize(tsc_limit + 1);
bindings_changed = true;
}
if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(), tic_limit)) {
channel_state->compute_image_view_ids.resize(tic_limit + 1);
bindings_changed = true;
}
if (bindings_changed) {
++texture_bindings_serial;
} }
} }
@ -547,45 +536,30 @@ typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
} }
template <class P> template <class P>
template <bool has_blacklists> ImageViewId TextureCache<P>::VisitImageView(u32 index, bool compute) {
void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table, auto& table = compute ? channel_state->compute_image_table : channel_state->graphics_image_table;
std::span<ImageViewId> cached_image_view_ids,
std::span<ImageViewInOut> views) {
bool has_blacklisted = false;
do {
has_deleted_images = false;
if constexpr (has_blacklists) {
has_blacklisted = false;
}
for (ImageViewInOut& view : views) {
view.id = VisitImageView(table, cached_image_view_ids, view.index);
if constexpr (has_blacklists) {
if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) {
const ImageViewBase& image_view{slot_image_views[view.id]};
auto& image = slot_images[image_view.image_id];
has_blacklisted |= ScaleDown(image);
image.scale_rating = 0;
}
}
}
} while (has_deleted_images || (has_blacklists && has_blacklisted));
}
template <class P>
ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, std::span<ImageViewId> cached_image_view_ids, u32 index) {
if (index > table.current_limit) { if (index > table.current_limit) {
LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); LOG_DEBUG(HW_GPU, "Invalid image view index={}", index);
return NULL_IMAGE_VIEW_ID; return NULL_IMAGE_VIEW_ID;
} }
const auto [descriptor, is_new] = table.Read(*gpu_memory, index); // Is new (on the tegra engine side)?
ImageViewId& image_view_id = cached_image_view_ids[index]; auto const [descriptor, is_new] = table.Read(*gpu_memory, index);
if (is_new) { if (is_new) {
image_view_id = FindImageView(descriptor); if (IsValidEntry(*gpu_memory, descriptor)) {
// Is new (registered view) on the texture cache side?
const auto [pair, is_new_tc] = channel_state->image_views.try_emplace(descriptor);
if (is_new_tc)
pair->second = CreateImageView(descriptor);
PrepareImageView(pair->second, false, false);
channel_state->image_view_ids.insert_or_assign(index | (compute ? Common::SlotId::TAGGED_VALUE : 0), pair->second);
return pair->second;
}
channel_state->image_view_ids.insert_or_assign(index | (compute ? Common::SlotId::TAGGED_VALUE : 0), NULL_IMAGE_VIEW_ID);
return NULL_IMAGE_VIEW_ID;
} }
if (image_view_id != NULL_IMAGE_VIEW_ID) { auto const it = channel_state->image_view_ids.find(index | (compute ? Common::SlotId::TAGGED_VALUE : 0));
PrepareImageView(image_view_id, false, false); PrepareImageView(it->second, false, false);
} return it->second;
return image_view_id;
} }
template <class P> template <class P>
@ -1184,19 +1158,6 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
} }
} }
template <class P>
ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
if (!IsValidEntry(*gpu_memory, config)) {
return NULL_IMAGE_VIEW_ID;
}
const auto [pair, is_new] = channel_state->image_views.try_emplace(config);
ImageViewId& image_view_id = pair->second;
if (is_new) {
image_view_id = CreateImageView(config);
}
return image_view_id;
}
template <class P> template <class P>
ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) { ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) {
const ImageInfo info(config); const ImageInfo info(config);
@ -1338,10 +1299,10 @@ void TextureCache<P>::InvalidateScale(Image& image) {
image.image_view_infos.clear(); image.image_view_infos.clear();
for (size_t c : active_channel_ids) { for (size_t c : active_channel_ids) {
auto& channel_info = channel_storage[c]; auto& channel_info = channel_storage[c];
if constexpr (ENABLE_VALIDATION) {
std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID); if constexpr (ENABLE_VALIDATION)
std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID); for (auto& e : channel_info.image_view_ids)
} e.second = CORRUPT_ID;
channel_info.graphics_image_table.Invalidate(); channel_info.graphics_image_table.Invalidate();
channel_info.compute_image_table.Invalidate(); channel_info.compute_image_table.Invalidate();
} }
@ -1906,7 +1867,7 @@ std::pair<u32, u32> TextureCache<P>::PrepareDmaImage(ImageId dst_id, GPUVAddr ba
} }
template <class P> template <class P>
SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { SamplerId TextureCache<P>::FindSampler(const TSCEntry& config, bool compute) {
if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
return NULL_SAMPLER_ID; return NULL_SAMPLER_ID;
} }
@ -1929,68 +1890,48 @@ std::optional<size_t> TextureCache<P>::QuerySamplerBudget() const {
template <class P> template <class P>
void TextureCache<P>::EnforceSamplerBudget() { void TextureCache<P>::EnforceSamplerBudget() {
const auto budget = QuerySamplerBudget(); if (auto const budget = QuerySamplerBudget(); budget) {
if (!budget) { if (slot_samplers.size() < *budget) {
return; return;
}
if (!channel_state) {
return;
}
if (last_sampler_gc_frame == frame_tick) {
return;
}
last_sampler_gc_frame = frame_tick;
TrimInactiveSamplers(*budget);
} }
if (slot_samplers.size() < *budget) {
return;
}
if (!channel_state) {
return;
}
if (last_sampler_gc_frame == frame_tick) {
return;
}
last_sampler_gc_frame = frame_tick;
TrimInactiveSamplers(*budget);
} }
template <class P> template <class P>
void TextureCache<P>::TrimInactiveSamplers(size_t budget) { void TextureCache<P>::TrimInactiveSamplers(size_t budget) {
if (channel_state->samplers.empty()) { if (channel_state->samplers.size() > 0) {
return; constexpr size_t SAMPLER_GC_SLACK = 1024;
} ankerl::unordered_dense::set<SamplerId> active_sampler_ids;
constexpr size_t SAMPLER_GC_SLACK = 1024; for (auto const& e : channel_state->sampler_ids)
auto mark_active = [](auto& set, SamplerId id) { active_sampler_ids.insert(e.second);
if (!id || id == CORRUPT_ID || id == NULL_SAMPLER_ID) { // Elements in the map must be necesarily valid
return; size_t removed = 0;
for (auto it = channel_state->samplers.begin(); it != channel_state->samplers.end();) {
const SamplerId sampler_id = it->second;
if (!sampler_id || sampler_id == CORRUPT_ID) {
it = channel_state->samplers.erase(it);
} else if (std::ranges::find(active_sampler_ids, sampler_id) != active_sampler_ids.end()) {
++it;
} else {
slot_samplers.erase(sampler_id);
it = channel_state->samplers.erase(it);
++removed;
if (slot_samplers.size() + SAMPLER_GC_SLACK <= budget) {
break;
}
}
} }
set.insert(id); if (removed != 0) {
}; LOG_WARNING(HW_GPU, "Sampler cache exceeded {} entries on this driver; reclaimed {} inactive samplers", budget, removed);
ankerl::unordered_dense::set<SamplerId> active;
active.reserve(channel_state->graphics_sampler_ids.size() + channel_state->compute_sampler_ids.size());
for (const SamplerId id : channel_state->graphics_sampler_ids) {
mark_active(active, id);
}
for (const SamplerId id : channel_state->compute_sampler_ids) {
mark_active(active, id);
}
size_t removed = 0;
auto& sampler_map = channel_state->samplers;
for (auto it = sampler_map.begin(); it != sampler_map.end();) {
const SamplerId sampler_id = it->second;
if (!sampler_id || sampler_id == CORRUPT_ID) {
it = sampler_map.erase(it);
continue;
} }
if (active.find(sampler_id) != active.end()) {
++it;
continue;
}
slot_samplers.erase(sampler_id);
it = sampler_map.erase(it);
++removed;
if (slot_samplers.size() + SAMPLER_GC_SLACK <= budget) {
break;
}
}
if (removed != 0) {
LOG_WARNING(HW_GPU,
"Sampler cache exceeded {} entries on this driver; reclaimed {} inactive samplers",
budget, removed);
} }
} }
@ -2230,8 +2171,7 @@ ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const Imag
if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
return image_view_id; return image_view_id;
} }
const ImageViewId image_view_id = const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image, slot_images);
slot_image_views.insert(runtime, info, image_id, image, slot_images);
image.InsertView(info, image_view_id); image.InsertView(info, image_view_id);
return image_view_id; return image_view_id;
} }
@ -2491,10 +2431,9 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
} }
for (size_t c : active_channel_ids) { for (size_t c : active_channel_ids) {
auto& channel_info = channel_storage[c]; auto& channel_info = channel_storage[c];
if constexpr (ENABLE_VALIDATION) { if constexpr (ENABLE_VALIDATION)
std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID); for (auto& e : channel_info.image_view_ids)
std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID); e.second = CORRUPT_ID;
}
channel_info.graphics_image_table.Invalidate(); channel_info.graphics_image_table.Invalidate();
channel_info.compute_image_table.Invalidate(); channel_info.compute_image_table.Invalidate();
} }

View file

@ -86,12 +86,8 @@ public:
std::unordered_map<TICEntry, ImageViewId> image_views; std::unordered_map<TICEntry, ImageViewId> image_views;
std::unordered_map<TSCEntry, SamplerId> samplers; std::unordered_map<TSCEntry, SamplerId> samplers;
// Values tuned for Mario Brothership, see also descriptor_table.h ankerl::unordered_dense::map<u32, SamplerId> sampler_ids;
// Change values as required. ankerl::unordered_dense::map<u32, ImageViewId> image_view_ids;
boost::container::static_vector<SamplerId, 0x1000 + 1> graphics_sampler_ids;
boost::container::static_vector<SamplerId, 0x1000 + 1> compute_sampler_ids;
boost::container::static_vector<ImageViewId, 0x80000 + 1> graphics_image_view_ids;
boost::container::static_vector<ImageViewId, 0x80000 + 1> compute_image_view_ids;
TextureCacheGPUMap* gpu_page_table = nullptr; TextureCacheGPUMap* gpu_page_table = nullptr;
TextureCacheGPUMap* sparse_page_table = nullptr; TextureCacheGPUMap* sparse_page_table = nullptr;
@ -170,27 +166,17 @@ public:
/// Mark an image as modified from the GPU /// Mark an image as modified from the GPU
void MarkModification(ImageId id) noexcept; void MarkModification(ImageId id) noexcept;
/// Fill image_view_ids with the graphics images in indices /// Fill image_view_ids with the graphics/compute images in indices
template <bool has_blacklists> void FillImageViews(std::span<ImageViewInOut> views, bool compute, bool blacklist = true);
void FillGraphicsImageViews(std::span<ImageViewInOut> views);
/// Fill image_view_ids with the compute images in indices
void FillComputeImageViews(std::span<ImageViewInOut> views);
/// Handle feedback loops during draws. /// Handle feedback loops during draws.
void CheckFeedbackLoop(std::span<const ImageViewInOut> views); void CheckFeedbackLoop(std::span<const ImageViewInOut> views);
/// Get the sampler from the graphics descriptor table in the specified index /// Get the sampler from the graphics/compute descriptor table in the specified index
Sampler* GetGraphicsSampler(u32 index); Sampler* GetSampler(u32 index, bool compute);
/// Get the sampler from the compute descriptor table in the specified index /// Get the sampler id from the graphics/compute descriptor table in the specified index
Sampler* GetComputeSampler(u32 index); SamplerId GetSamplerId(u32 index, bool compute);
/// Get the sampler id from the graphics descriptor table in the specified index
SamplerId GetGraphicsSamplerId(u32 index);
/// Get the sampler id from the compute descriptor table in the specified index
SamplerId GetComputeSamplerId(u32 index);
/// Return a constant reference to the given sampler id /// Return a constant reference to the given sampler id
[[nodiscard]] const Sampler& GetSampler(SamplerId id) const noexcept; [[nodiscard]] const Sampler& GetSampler(SamplerId id) const noexcept;
@ -198,11 +184,8 @@ public:
/// Return a reference to the given sampler id /// Return a reference to the given sampler id
[[nodiscard]] Sampler& GetSampler(SamplerId id) noexcept; [[nodiscard]] Sampler& GetSampler(SamplerId id) noexcept;
/// Refresh the state for graphics image view and sampler descriptors /// Refresh the state for graphics/compute image view and sampler descriptors
void SynchronizeGraphicsDescriptors(); void SynchronizeDescriptors(bool compute);
/// Refresh the state for compute image view and sampler descriptors
void SynchronizeComputeDescriptors();
/// Updates the Render Targets if they can be rescaled /// Updates the Render Targets if they can be rescaled
/// @retval True if the Render Targets have been rescaled. /// @retval True if the Render Targets have been rescaled.
@ -313,15 +296,8 @@ private:
/// Runs the Garbage Collector. /// Runs the Garbage Collector.
void RunGarbageCollector(); void RunGarbageCollector();
/// Fills image_view_ids in the image views in indices
template <bool has_blacklists>
void FillImageViews(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids,
std::span<ImageViewInOut> views);
/// Find or create an image view in the guest descriptor table /// Find or create an image view in the guest descriptor table
ImageViewId VisitImageView(DescriptorTable<TICEntry>& table, ImageViewId VisitImageView(u32 index, bool compute);
std::span<ImageViewId> cached_image_view_ids, u32 index);
/// Find or create a framebuffer with the given render target parameters /// Find or create a framebuffer with the given render target parameters
FramebufferId GetFramebufferId(const RenderTargets& key); FramebufferId GetFramebufferId(const RenderTargets& key);
@ -333,9 +309,6 @@ private:
template <typename StagingBuffer> template <typename StagingBuffer>
void UploadImageContents(Image& image, StagingBuffer& staging_buffer); void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
/// Find or create an image view from a guest descriptor
[[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
/// Create a new image view from a guest descriptor /// Create a new image view from a guest descriptor
[[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
@ -363,7 +336,7 @@ private:
const Tegra::Engines::Fermi2D::Config& copy); const Tegra::Engines::Fermi2D::Config& copy);
/// Find or create a sampler from a guest descriptor sampler /// Find or create a sampler from a guest descriptor sampler
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config); [[nodiscard]] SamplerId FindSampler(const TSCEntry& config, bool compute);
/// Find or create an image view for the given color buffer index /// Find or create an image view for the given color buffer index
[[nodiscard]] ImageViewId FindColorBuffer(size_t index); [[nodiscard]] ImageViewId FindColorBuffer(size_t index);