mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-04-27 17:58:59 +02:00
[video_core] fix redundant resize-copy overload and just use default-init resize, to reduce stutter on Mario BP
Signed-off-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
parent
91058d7383
commit
807b521658
7 changed files with 80 additions and 104 deletions
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
|
|
@ -52,7 +52,7 @@ public:
|
|||
++id.index;
|
||||
} while (id.index < size && !IsValid(bitset));
|
||||
if (id.index == size) {
|
||||
id.index = SlotId::INVALID_INDEX;
|
||||
id = SlotId{};
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
|
|
@ -141,7 +141,7 @@ public:
|
|||
}
|
||||
|
||||
[[nodiscard]] Iterator end() noexcept {
|
||||
return Iterator(this, SlotId{SlotId::INVALID_INDEX});
|
||||
return Iterator(this, SlotId{});
|
||||
}
|
||||
|
||||
[[nodiscard]] size_t size() const noexcept {
|
||||
|
|
|
|||
|
|
@ -486,27 +486,17 @@ void TouchResource::ReadTouchInput() {
|
|||
SanitizeInput(current_touch_state);
|
||||
|
||||
std::scoped_lock lock{*input_mutex};
|
||||
if (current_touch_state.entry_count == previous_touch_state.entry_count) {
|
||||
if (current_touch_state.entry_count < 1) {
|
||||
return;
|
||||
}
|
||||
if (current_touch_state.entry_count == previous_touch_state.entry_count && current_touch_state.entry_count >= 1) {
|
||||
bool has_moved = false;
|
||||
for (std::size_t i = 0; i < static_cast<std::size_t>(current_touch_state.entry_count);
|
||||
i++) {
|
||||
s32 delta_x = std::abs(static_cast<s32>(current_touch_state.states[i].position.x) -
|
||||
static_cast<s32>(previous_touch_state.states[i].position.x));
|
||||
s32 delta_y = std::abs(static_cast<s32>(current_touch_state.states[i].position.y) -
|
||||
static_cast<s32>(previous_touch_state.states[i].position.y));
|
||||
if (delta_x > 1 || delta_y > 1) {
|
||||
has_moved = true;
|
||||
}
|
||||
for (std::size_t i = 0; !has_moved && i < std::size_t(current_touch_state.entry_count); i++) {
|
||||
s32 delta_x = std::abs(s32(current_touch_state.states[i].position.x) - s32(previous_touch_state.states[i].position.x));
|
||||
s32 delta_y = std::abs(s32(current_touch_state.states[i].position.y) - s32(previous_touch_state.states[i].position.y));
|
||||
has_moved |= (delta_x > 1 || delta_y > 1);
|
||||
}
|
||||
if (!has_moved) {
|
||||
return;
|
||||
if (has_moved) {
|
||||
input_event->Signal();
|
||||
}
|
||||
}
|
||||
|
||||
input_event->Signal();
|
||||
}
|
||||
|
||||
void TouchResource::OnTouchUpdate(s64 timestamp) {
|
||||
|
|
|
|||
|
|
@ -314,8 +314,8 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
|
|||
|
||||
template <typename Spec>
|
||||
bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
||||
small_vector<VideoCommon::ImageViewInOut, INLINE_IMAGE_ELEMENTS> views;
|
||||
small_vector<VideoCommon::SamplerId, INLINE_IMAGE_ELEMENTS> samplers;
|
||||
boost::container::small_vector<VideoCommon::ImageViewInOut, INLINE_IMAGE_ELEMENTS> views;
|
||||
boost::container::small_vector<VideoCommon::SamplerId, INLINE_IMAGE_ELEMENTS> samplers;
|
||||
views.reserve(num_image_elements);
|
||||
samplers.reserve(num_textures);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
|
@ -6,37 +9,39 @@
|
|||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "common/assert.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
template <typename Descriptor>
|
||||
template <typename T>
|
||||
class DescriptorTable {
|
||||
public:
|
||||
explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {}
|
||||
|
||||
[[nodiscard]] bool Synchronize(GPUVAddr gpu_addr, u32 limit) {
|
||||
[[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) { return false; }
|
||||
Refresh(gpu_addr, limit);
|
||||
return true;
|
||||
[[nodiscard]] bool Synchronize(GPUVAddr gpu_addr, u32 limit) noexcept {
|
||||
bool ret = !(current_gpu_addr == gpu_addr && current_limit == limit);
|
||||
if (ret) {
|
||||
Refresh(gpu_addr, limit);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void Invalidate() noexcept {
|
||||
std::ranges::fill(read_descriptors, 0);
|
||||
}
|
||||
|
||||
[[nodiscard]] std::pair<Descriptor, bool> Read(u32 index) {
|
||||
[[nodiscard]] std::pair<T, bool> Read(Tegra::MemoryManager const& gpu_memory, u32 index) noexcept {
|
||||
DEBUG_ASSERT(index <= current_limit);
|
||||
const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor);
|
||||
std::pair<Descriptor, bool> result;
|
||||
gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor));
|
||||
if (IsDescriptorRead(index)) {
|
||||
const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(T);
|
||||
std::pair<T, bool> result;
|
||||
gpu_memory.ReadBlockUnsafe(gpu_addr, std::addressof(result.first), sizeof(T));
|
||||
if ((read_descriptors[index / 64] & (1ULL << (index % 64))) != 0) {
|
||||
result.second = result.first != descriptors[index];
|
||||
} else {
|
||||
MarkDescriptorAsRead(index);
|
||||
read_descriptors[index / 64] |= 1ULL << (index % 64);
|
||||
result.second = true;
|
||||
}
|
||||
if (result.second) {
|
||||
|
|
@ -45,34 +50,24 @@ public:
|
|||
return result;
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 Limit() const noexcept {
|
||||
return current_limit;
|
||||
}
|
||||
|
||||
private:
|
||||
void Refresh(GPUVAddr gpu_addr, u32 limit) {
|
||||
void Refresh(GPUVAddr gpu_addr, u32 limit) noexcept {
|
||||
current_gpu_addr = gpu_addr;
|
||||
current_limit = limit;
|
||||
|
||||
const size_t num_descriptors = static_cast<size_t>(limit) + 1;
|
||||
read_descriptors.clear();
|
||||
read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0);
|
||||
// Mario Brothership reallocates a lot of times, so use aggressive pre-alloc sizes
|
||||
// std::vector<T> by default uses quadratic growth, but that isn't even enough to satisfy brothership
|
||||
const size_t num_descriptors = ((limit + 0x80000) & (~0x7ffff)) + 1;
|
||||
size_t old_size = read_descriptors.size();
|
||||
read_descriptors.resize(Common::DivCeil(num_descriptors, 64U));
|
||||
old_size = (std::min)(old_size, read_descriptors.size());
|
||||
std::fill(read_descriptors.begin(), read_descriptors.begin() + old_size, 0);
|
||||
//
|
||||
descriptors.resize(num_descriptors);
|
||||
}
|
||||
|
||||
void MarkDescriptorAsRead(u32 index) noexcept {
|
||||
read_descriptors[index / 64] |= 1ULL << (index % 64);
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept {
|
||||
return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0;
|
||||
}
|
||||
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
std::vector<u64> read_descriptors;
|
||||
std::vector<T> descriptors;
|
||||
GPUVAddr current_gpu_addr{};
|
||||
u32 current_limit{};
|
||||
std::vector<u64> read_descriptors;
|
||||
std::vector<Descriptor> descriptors;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
|
|
|||
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
|
|
@ -7,8 +10,8 @@
|
|||
namespace VideoCommon {
|
||||
|
||||
TextureCacheChannelInfo::TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept
|
||||
: ChannelInfo(state), graphics_image_table{gpu_memory}, graphics_sampler_table{gpu_memory},
|
||||
compute_image_table{gpu_memory}, compute_sampler_table{gpu_memory} {}
|
||||
: ChannelInfo(state)
|
||||
{}
|
||||
|
||||
template class VideoCommon::ChannelSetupCaches<VideoCommon::TextureCacheChannelInfo>;
|
||||
|
||||
|
|
|
|||
|
|
@ -262,8 +262,7 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
|
|||
|
||||
template <class P>
|
||||
typename P::ImageView& TextureCache<P>::GetImageView(u32 index) noexcept {
|
||||
const auto image_view_id = VisitImageView(channel_state->graphics_image_table,
|
||||
channel_state->graphics_image_view_ids, index);
|
||||
const auto image_view_id = VisitImageView(channel_state->graphics_image_table, channel_state->graphics_image_view_ids, index);
|
||||
return slot_image_views[image_view_id];
|
||||
}
|
||||
|
||||
|
|
@ -275,14 +274,12 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
|
|||
template <class P>
|
||||
template <bool has_blacklists>
|
||||
void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
|
||||
FillImageViews<has_blacklists>(channel_state->graphics_image_table,
|
||||
channel_state->graphics_image_view_ids, views);
|
||||
FillImageViews<has_blacklists>(channel_state->graphics_image_table, channel_state->graphics_image_view_ids, views);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
|
||||
FillImageViews<true>(channel_state->compute_image_table, channel_state->compute_image_view_ids,
|
||||
views);
|
||||
FillImageViews<true>(channel_state->compute_image_table, channel_state->compute_image_view_ids, views);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
|
@ -361,29 +358,27 @@ typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
|
|||
|
||||
template <class P>
|
||||
SamplerId TextureCache<P>::GetGraphicsSamplerId(u32 index) {
|
||||
if (index > channel_state->graphics_sampler_table.Limit()) {
|
||||
if (index > channel_state->graphics_sampler_table.current_limit) {
|
||||
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
|
||||
return NULL_SAMPLER_ID;
|
||||
}
|
||||
const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index);
|
||||
const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(*gpu_memory, index);
|
||||
SamplerId& id = channel_state->graphics_sampler_ids[index];
|
||||
if (is_new) {
|
||||
if (is_new)
|
||||
id = FindSampler(descriptor);
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
SamplerId TextureCache<P>::GetComputeSamplerId(u32 index) {
|
||||
if (index > channel_state->compute_sampler_table.Limit()) {
|
||||
if (index > channel_state->compute_sampler_table.current_limit) {
|
||||
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
|
||||
return NULL_SAMPLER_ID;
|
||||
}
|
||||
const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index);
|
||||
const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(*gpu_memory, index);
|
||||
SamplerId& id = channel_state->compute_sampler_ids[index];
|
||||
if (is_new) {
|
||||
if (is_new)
|
||||
id = FindSampler(descriptor);
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
|
|
@ -399,19 +394,16 @@ typename P::Sampler& TextureCache<P>::GetSampler(SamplerId id) noexcept {
|
|||
|
||||
template <class P>
|
||||
void TextureCache<P>::SynchronizeGraphicsDescriptors() {
|
||||
using SamplerBinding = Tegra::Engines::Maxwell3D::Regs::SamplerBinding;
|
||||
const bool linked_tsc = maxwell3d->regs.sampler_binding == SamplerBinding::ViaHeaderBinding;
|
||||
const bool linked_tsc = maxwell3d->regs.sampler_binding == Tegra::Engines::Maxwell3D::Regs::SamplerBinding::ViaHeaderBinding;
|
||||
const u32 tic_limit = maxwell3d->regs.tex_header.limit;
|
||||
const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tex_sampler.limit;
|
||||
bool bindings_changed = false;
|
||||
if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(),
|
||||
tsc_limit)) {
|
||||
channel_state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
|
||||
if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(), tsc_limit)) {
|
||||
channel_state->graphics_sampler_ids.resize(tsc_limit + 1);
|
||||
bindings_changed = true;
|
||||
}
|
||||
if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(),
|
||||
tic_limit)) {
|
||||
channel_state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
|
||||
if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(), tic_limit)) {
|
||||
channel_state->graphics_image_view_ids.resize(tic_limit + 1);
|
||||
bindings_changed = true;
|
||||
}
|
||||
if (bindings_changed) {
|
||||
|
|
@ -424,15 +416,13 @@ void TextureCache<P>::SynchronizeComputeDescriptors() {
|
|||
const bool linked_tsc = kepler_compute->launch_description.linked_tsc;
|
||||
const u32 tic_limit = kepler_compute->regs.tic.limit;
|
||||
const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
|
||||
const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address();
|
||||
bool bindings_changed = false;
|
||||
if (channel_state->compute_sampler_table.Synchronize(tsc_gpu_addr, tsc_limit)) {
|
||||
channel_state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
|
||||
if (channel_state->compute_sampler_table.Synchronize(kepler_compute->regs.tsc.Address(), tsc_limit)) {
|
||||
channel_state->compute_sampler_ids.resize(tsc_limit + 1);
|
||||
bindings_changed = true;
|
||||
}
|
||||
if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(),
|
||||
tic_limit)) {
|
||||
channel_state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
|
||||
if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(), tic_limit)) {
|
||||
channel_state->compute_image_view_ids.resize(tic_limit + 1);
|
||||
bindings_changed = true;
|
||||
}
|
||||
if (bindings_changed) {
|
||||
|
|
@ -640,14 +630,12 @@ void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
|
|||
}
|
||||
|
||||
template <class P>
|
||||
ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
|
||||
std::span<ImageViewId> cached_image_view_ids,
|
||||
u32 index) {
|
||||
if (index > table.Limit()) {
|
||||
ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, std::span<ImageViewId> cached_image_view_ids, u32 index) {
|
||||
if (index > table.current_limit) {
|
||||
LOG_DEBUG(HW_GPU, "Invalid image view index={}", index);
|
||||
return NULL_IMAGE_VIEW_ID;
|
||||
}
|
||||
const auto [descriptor, is_new] = table.Read(index);
|
||||
const auto [descriptor, is_new] = table.Read(*gpu_memory, index);
|
||||
ImageViewId& image_view_id = cached_image_view_ids[index];
|
||||
if (is_new) {
|
||||
image_view_id = FindImageView(descriptor);
|
||||
|
|
@ -2086,8 +2074,7 @@ void TextureCache<P>::TrimInactiveSamplers(size_t budget) {
|
|||
set.insert(id);
|
||||
};
|
||||
ankerl::unordered_dense::set<SamplerId> active;
|
||||
active.reserve(channel_state->graphics_sampler_ids.size() +
|
||||
channel_state->compute_sampler_ids.size());
|
||||
active.reserve(channel_state->graphics_sampler_ids.size() + channel_state->compute_sampler_ids.size());
|
||||
for (const SamplerId id : channel_state->graphics_sampler_ids) {
|
||||
mark_active(active, id);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
#include <ankerl/unordered_dense.h>
|
||||
#include <vector>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <queue>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
|
@ -76,22 +77,22 @@ public:
|
|||
TextureCacheChannelInfo(const TextureCacheChannelInfo& state) = delete;
|
||||
TextureCacheChannelInfo& operator=(const TextureCacheChannelInfo&) = delete;
|
||||
|
||||
DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
|
||||
DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
|
||||
std::vector<SamplerId> graphics_sampler_ids;
|
||||
std::vector<ImageViewId> graphics_image_view_ids;
|
||||
|
||||
DescriptorTable<TICEntry> compute_image_table{gpu_memory};
|
||||
DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
|
||||
std::vector<SamplerId> compute_sampler_ids;
|
||||
std::vector<ImageViewId> compute_image_view_ids;
|
||||
DescriptorTable<TICEntry> graphics_image_table;
|
||||
DescriptorTable<TSCEntry> graphics_sampler_table;
|
||||
DescriptorTable<TICEntry> compute_image_table;
|
||||
DescriptorTable<TSCEntry> compute_sampler_table;
|
||||
|
||||
// TODO: still relies on bad iterators :(
|
||||
std::unordered_map<TICEntry, ImageViewId> image_views;
|
||||
std::unordered_map<TSCEntry, SamplerId> samplers;
|
||||
|
||||
TextureCacheGPUMap* gpu_page_table;
|
||||
TextureCacheGPUMap* sparse_page_table;
|
||||
std::vector<SamplerId> graphics_sampler_ids;
|
||||
std::vector<SamplerId> compute_sampler_ids;
|
||||
std::vector<ImageViewId> graphics_image_view_ids;
|
||||
std::vector<ImageViewId> compute_image_view_ids;
|
||||
|
||||
TextureCacheGPUMap* gpu_page_table = nullptr;
|
||||
TextureCacheGPUMap* sparse_page_table = nullptr;
|
||||
};
|
||||
|
||||
template <class P>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue