[video_core] fix redundant resize-copy overload and just use default-init resize, to reduce stutter on Mario BP

Signed-off-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
lizzie 2026-04-22 08:56:48 +00:00 committed by crueter
parent e875a3196b
commit d5ceb28336
7 changed files with 80 additions and 104 deletions

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project // SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
@ -52,7 +52,7 @@ public:
++id.index; ++id.index;
} while (id.index < size && !IsValid(bitset)); } while (id.index < size && !IsValid(bitset));
if (id.index == size) { if (id.index == size) {
id.index = SlotId::INVALID_INDEX; id = SlotId{};
} }
} }
return *this; return *this;
@ -141,7 +141,7 @@ public:
} }
[[nodiscard]] Iterator end() noexcept { [[nodiscard]] Iterator end() noexcept {
return Iterator(this, SlotId{SlotId::INVALID_INDEX}); return Iterator(this, SlotId{});
} }
[[nodiscard]] size_t size() const noexcept { [[nodiscard]] size_t size() const noexcept {

View file

@ -486,27 +486,17 @@ void TouchResource::ReadTouchInput() {
SanitizeInput(current_touch_state); SanitizeInput(current_touch_state);
std::scoped_lock lock{*input_mutex}; std::scoped_lock lock{*input_mutex};
if (current_touch_state.entry_count == previous_touch_state.entry_count) { if (current_touch_state.entry_count == previous_touch_state.entry_count && current_touch_state.entry_count >= 1) {
if (current_touch_state.entry_count < 1) {
return;
}
bool has_moved = false; bool has_moved = false;
for (std::size_t i = 0; i < static_cast<std::size_t>(current_touch_state.entry_count); for (std::size_t i = 0; !has_moved && i < std::size_t(current_touch_state.entry_count); i++) {
i++) { s32 delta_x = std::abs(s32(current_touch_state.states[i].position.x) - s32(previous_touch_state.states[i].position.x));
s32 delta_x = std::abs(static_cast<s32>(current_touch_state.states[i].position.x) - s32 delta_y = std::abs(s32(current_touch_state.states[i].position.y) - s32(previous_touch_state.states[i].position.y));
static_cast<s32>(previous_touch_state.states[i].position.x)); has_moved |= (delta_x > 1 || delta_y > 1);
s32 delta_y = std::abs(static_cast<s32>(current_touch_state.states[i].position.y) -
static_cast<s32>(previous_touch_state.states[i].position.y));
if (delta_x > 1 || delta_y > 1) {
has_moved = true;
}
} }
if (!has_moved) { if (has_moved) {
return; input_event->Signal();
} }
} }
input_event->Signal();
} }
void TouchResource::OnTouchUpdate(s64 timestamp) { void TouchResource::OnTouchUpdate(s64 timestamp) {

View file

@ -314,8 +314,8 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
template <typename Spec> template <typename Spec>
bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
small_vector<VideoCommon::ImageViewInOut, INLINE_IMAGE_ELEMENTS> views; boost::container::small_vector<VideoCommon::ImageViewInOut, INLINE_IMAGE_ELEMENTS> views;
small_vector<VideoCommon::SamplerId, INLINE_IMAGE_ELEMENTS> samplers; boost::container::small_vector<VideoCommon::SamplerId, INLINE_IMAGE_ELEMENTS> samplers;
views.reserve(num_image_elements); views.reserve(num_image_elements);
samplers.reserve(num_textures); samplers.reserve(num_textures);

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@ -6,37 +9,39 @@
#include <algorithm> #include <algorithm>
#include <vector> #include <vector>
#include "common/alignment.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/div_ceil.h" #include "common/div_ceil.h"
#include "common/assert.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h" #include "video_core/rasterizer_interface.h"
namespace VideoCommon { namespace VideoCommon {
template <typename Descriptor> template <typename T>
class DescriptorTable { class DescriptorTable {
public: public:
explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {} [[nodiscard]] bool Synchronize(GPUVAddr gpu_addr, u32 limit) noexcept {
bool ret = !(current_gpu_addr == gpu_addr && current_limit == limit);
[[nodiscard]] bool Synchronize(GPUVAddr gpu_addr, u32 limit) { if (ret) {
[[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) { return false; } Refresh(gpu_addr, limit);
Refresh(gpu_addr, limit); }
return true; return ret;
} }
void Invalidate() noexcept { void Invalidate() noexcept {
std::ranges::fill(read_descriptors, 0); std::ranges::fill(read_descriptors, 0);
} }
[[nodiscard]] std::pair<Descriptor, bool> Read(u32 index) { [[nodiscard]] std::pair<T, bool> Read(Tegra::MemoryManager const& gpu_memory, u32 index) noexcept {
DEBUG_ASSERT(index <= current_limit); DEBUG_ASSERT(index <= current_limit);
const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor); const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(T);
std::pair<Descriptor, bool> result; std::pair<T, bool> result;
gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor)); gpu_memory.ReadBlockUnsafe(gpu_addr, std::addressof(result.first), sizeof(T));
if (IsDescriptorRead(index)) { if ((read_descriptors[index / 64] & (1ULL << (index % 64))) != 0) {
result.second = result.first != descriptors[index]; result.second = result.first != descriptors[index];
} else { } else {
MarkDescriptorAsRead(index); read_descriptors[index / 64] |= 1ULL << (index % 64);
result.second = true; result.second = true;
} }
if (result.second) { if (result.second) {
@ -45,34 +50,24 @@ public:
return result; return result;
} }
[[nodiscard]] u32 Limit() const noexcept { void Refresh(GPUVAddr gpu_addr, u32 limit) noexcept {
return current_limit;
}
private:
void Refresh(GPUVAddr gpu_addr, u32 limit) {
current_gpu_addr = gpu_addr; current_gpu_addr = gpu_addr;
current_limit = limit; current_limit = limit;
// Mario Brothership reallocates a lot of times, so use aggressive pre-alloc sizes
const size_t num_descriptors = static_cast<size_t>(limit) + 1; // std::vector<T> by default uses quadratic growth, but that isn't even enough to satisfy brothership
read_descriptors.clear(); const size_t num_descriptors = ((limit + 0x80000) & (~0x7ffff)) + 1;
read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0); size_t old_size = read_descriptors.size();
read_descriptors.resize(Common::DivCeil(num_descriptors, 64U));
old_size = (std::min)(old_size, read_descriptors.size());
std::fill(read_descriptors.begin(), read_descriptors.begin() + old_size, 0);
//
descriptors.resize(num_descriptors); descriptors.resize(num_descriptors);
} }
void MarkDescriptorAsRead(u32 index) noexcept { std::vector<u64> read_descriptors;
read_descriptors[index / 64] |= 1ULL << (index % 64); std::vector<T> descriptors;
}
[[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept {
return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0;
}
Tegra::MemoryManager& gpu_memory;
GPUVAddr current_gpu_addr{}; GPUVAddr current_gpu_addr{};
u32 current_limit{}; u32 current_limit{};
std::vector<u64> read_descriptors;
std::vector<Descriptor> descriptors;
}; };
} // namespace VideoCommon } // namespace VideoCommon

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project // SPDX-FileCopyrightText: 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
@ -7,8 +10,8 @@
namespace VideoCommon { namespace VideoCommon {
TextureCacheChannelInfo::TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept TextureCacheChannelInfo::TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept
: ChannelInfo(state), graphics_image_table{gpu_memory}, graphics_sampler_table{gpu_memory}, : ChannelInfo(state)
compute_image_table{gpu_memory}, compute_sampler_table{gpu_memory} {} {}
template class VideoCommon::ChannelSetupCaches<VideoCommon::TextureCacheChannelInfo>; template class VideoCommon::ChannelSetupCaches<VideoCommon::TextureCacheChannelInfo>;

View file

@ -204,8 +204,7 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
template <class P> template <class P>
typename P::ImageView& TextureCache<P>::GetImageView(u32 index) noexcept { typename P::ImageView& TextureCache<P>::GetImageView(u32 index) noexcept {
const auto image_view_id = VisitImageView(channel_state->graphics_image_table, const auto image_view_id = VisitImageView(channel_state->graphics_image_table, channel_state->graphics_image_view_ids, index);
channel_state->graphics_image_view_ids, index);
return slot_image_views[image_view_id]; return slot_image_views[image_view_id];
} }
@ -217,14 +216,12 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
template <class P> template <class P>
template <bool has_blacklists> template <bool has_blacklists>
void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) { void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
FillImageViews<has_blacklists>(channel_state->graphics_image_table, FillImageViews<has_blacklists>(channel_state->graphics_image_table, channel_state->graphics_image_view_ids, views);
channel_state->graphics_image_view_ids, views);
} }
template <class P> template <class P>
void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) { void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
FillImageViews<true>(channel_state->compute_image_table, channel_state->compute_image_view_ids, FillImageViews<true>(channel_state->compute_image_table, channel_state->compute_image_view_ids, views);
views);
} }
template <class P> template <class P>
@ -303,29 +300,27 @@ typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
template <class P> template <class P>
SamplerId TextureCache<P>::GetGraphicsSamplerId(u32 index) { SamplerId TextureCache<P>::GetGraphicsSamplerId(u32 index) {
if (index > channel_state->graphics_sampler_table.Limit()) { if (index > channel_state->graphics_sampler_table.current_limit) {
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
return NULL_SAMPLER_ID; return NULL_SAMPLER_ID;
} }
const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index); const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(*gpu_memory, index);
SamplerId& id = channel_state->graphics_sampler_ids[index]; SamplerId& id = channel_state->graphics_sampler_ids[index];
if (is_new) { if (is_new)
id = FindSampler(descriptor); id = FindSampler(descriptor);
}
return id; return id;
} }
template <class P> template <class P>
SamplerId TextureCache<P>::GetComputeSamplerId(u32 index) { SamplerId TextureCache<P>::GetComputeSamplerId(u32 index) {
if (index > channel_state->compute_sampler_table.Limit()) { if (index > channel_state->compute_sampler_table.current_limit) {
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
return NULL_SAMPLER_ID; return NULL_SAMPLER_ID;
} }
const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index); const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(*gpu_memory, index);
SamplerId& id = channel_state->compute_sampler_ids[index]; SamplerId& id = channel_state->compute_sampler_ids[index];
if (is_new) { if (is_new)
id = FindSampler(descriptor); id = FindSampler(descriptor);
}
return id; return id;
} }
@ -341,19 +336,16 @@ typename P::Sampler& TextureCache<P>::GetSampler(SamplerId id) noexcept {
template <class P> template <class P>
void TextureCache<P>::SynchronizeGraphicsDescriptors() { void TextureCache<P>::SynchronizeGraphicsDescriptors() {
using SamplerBinding = Tegra::Engines::Maxwell3D::Regs::SamplerBinding; const bool linked_tsc = maxwell3d->regs.sampler_binding == Tegra::Engines::Maxwell3D::Regs::SamplerBinding::ViaHeaderBinding;
const bool linked_tsc = maxwell3d->regs.sampler_binding == SamplerBinding::ViaHeaderBinding;
const u32 tic_limit = maxwell3d->regs.tex_header.limit; const u32 tic_limit = maxwell3d->regs.tex_header.limit;
const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tex_sampler.limit; const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tex_sampler.limit;
bool bindings_changed = false; bool bindings_changed = false;
if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(), if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(), tsc_limit)) {
tsc_limit)) { channel_state->graphics_sampler_ids.resize(tsc_limit + 1);
channel_state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
bindings_changed = true; bindings_changed = true;
} }
if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(), if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(), tic_limit)) {
tic_limit)) { channel_state->graphics_image_view_ids.resize(tic_limit + 1);
channel_state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
bindings_changed = true; bindings_changed = true;
} }
if (bindings_changed) { if (bindings_changed) {
@ -366,15 +358,13 @@ void TextureCache<P>::SynchronizeComputeDescriptors() {
const bool linked_tsc = kepler_compute->launch_description.linked_tsc; const bool linked_tsc = kepler_compute->launch_description.linked_tsc;
const u32 tic_limit = kepler_compute->regs.tic.limit; const u32 tic_limit = kepler_compute->regs.tic.limit;
const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit; const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address();
bool bindings_changed = false; bool bindings_changed = false;
if (channel_state->compute_sampler_table.Synchronize(tsc_gpu_addr, tsc_limit)) { if (channel_state->compute_sampler_table.Synchronize(kepler_compute->regs.tsc.Address(), tsc_limit)) {
channel_state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); channel_state->compute_sampler_ids.resize(tsc_limit + 1);
bindings_changed = true; bindings_changed = true;
} }
if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(), if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(), tic_limit)) {
tic_limit)) { channel_state->compute_image_view_ids.resize(tic_limit + 1);
channel_state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
bindings_changed = true; bindings_changed = true;
} }
if (bindings_changed) { if (bindings_changed) {
@ -582,14 +572,12 @@ void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
} }
template <class P> template <class P>
ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, std::span<ImageViewId> cached_image_view_ids, u32 index) {
std::span<ImageViewId> cached_image_view_ids, if (index > table.current_limit) {
u32 index) {
if (index > table.Limit()) {
LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); LOG_DEBUG(HW_GPU, "Invalid image view index={}", index);
return NULL_IMAGE_VIEW_ID; return NULL_IMAGE_VIEW_ID;
} }
const auto [descriptor, is_new] = table.Read(index); const auto [descriptor, is_new] = table.Read(*gpu_memory, index);
ImageViewId& image_view_id = cached_image_view_ids[index]; ImageViewId& image_view_id = cached_image_view_ids[index];
if (is_new) { if (is_new) {
image_view_id = FindImageView(descriptor); image_view_id = FindImageView(descriptor);
@ -1971,8 +1959,7 @@ void TextureCache<P>::TrimInactiveSamplers(size_t budget) {
set.insert(id); set.insert(id);
}; };
ankerl::unordered_dense::set<SamplerId> active; ankerl::unordered_dense::set<SamplerId> active;
active.reserve(channel_state->graphics_sampler_ids.size() + active.reserve(channel_state->graphics_sampler_ids.size() + channel_state->compute_sampler_ids.size());
channel_state->compute_sampler_ids.size());
for (const SamplerId id : channel_state->graphics_sampler_ids) { for (const SamplerId id : channel_state->graphics_sampler_ids) {
mark_active(active, id); mark_active(active, id);
} }

View file

@ -17,6 +17,7 @@
#include <ankerl/unordered_dense.h> #include <ankerl/unordered_dense.h>
#include <vector> #include <vector>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include <boost/container/static_vector.hpp>
#include <queue> #include <queue>
#include "common/common_types.h" #include "common/common_types.h"
@ -76,22 +77,22 @@ public:
TextureCacheChannelInfo(const TextureCacheChannelInfo& state) = delete; TextureCacheChannelInfo(const TextureCacheChannelInfo& state) = delete;
TextureCacheChannelInfo& operator=(const TextureCacheChannelInfo&) = delete; TextureCacheChannelInfo& operator=(const TextureCacheChannelInfo&) = delete;
DescriptorTable<TICEntry> graphics_image_table{gpu_memory}; DescriptorTable<TICEntry> graphics_image_table;
DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory}; DescriptorTable<TSCEntry> graphics_sampler_table;
std::vector<SamplerId> graphics_sampler_ids; DescriptorTable<TICEntry> compute_image_table;
std::vector<ImageViewId> graphics_image_view_ids; DescriptorTable<TSCEntry> compute_sampler_table;
DescriptorTable<TICEntry> compute_image_table{gpu_memory};
DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
std::vector<SamplerId> compute_sampler_ids;
std::vector<ImageViewId> compute_image_view_ids;
// TODO: still relies on bad iterators :( // TODO: still relies on bad iterators :(
std::unordered_map<TICEntry, ImageViewId> image_views; std::unordered_map<TICEntry, ImageViewId> image_views;
std::unordered_map<TSCEntry, SamplerId> samplers; std::unordered_map<TSCEntry, SamplerId> samplers;
TextureCacheGPUMap* gpu_page_table; std::vector<SamplerId> graphics_sampler_ids;
TextureCacheGPUMap* sparse_page_table; std::vector<SamplerId> compute_sampler_ids;
std::vector<ImageViewId> graphics_image_view_ids;
std::vector<ImageViewId> compute_image_view_ids;
TextureCacheGPUMap* gpu_page_table = nullptr;
TextureCacheGPUMap* sparse_page_table = nullptr;
}; };
template <class P> template <class P>