mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-04-16 08:48:59 +02:00
[texture_cache, buffer_cache] Added TLS handling + changed command queue for GPU threading. (#3579)
(Merge of #3495 + #3108) This PR works around to simplify math operations on hot pointers inside the access and requests to the cache of buffers and texture cache, removing previous logic of indirection and replaced by a PoD approach. This will ensure less CPU times spended on the same request and flow directly into another chain of the render, in the same way, command queue currently uses an internal mutex that constraints the flow of data within the GPU threads, we're moving over a single command, I verified to keep using mutexes instead of internal mutex + mutex per operation, which are resolved by themselves. In simplier words, this aims to improve performance on those games and devices where the waits for next orders on GPU commands were heavier than a single verification. Co-Authored-by: @CamilleLaVey Co-Authored-by: @Lizzie Co-authored-by: CamilleLaVey <camillelavey99@gmail.com> Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3579 Reviewed-by: CamilleLaVey <camillelavey99@gmail.com> Co-authored-by: lizzie <lizzie@eden-emu.dev> Co-committed-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
parent
6f9d025ad2
commit
c9c136bea7
12 changed files with 266 additions and 72 deletions
|
|
@ -291,55 +291,59 @@ void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) {
|
|||
return;
|
||||
}
|
||||
|
||||
if (render_targets_serial == last_feedback_loop_serial &&
|
||||
texture_bindings_serial == last_feedback_texture_serial) {
|
||||
if (last_feedback_loop_result) {
|
||||
runtime.BarrierFeedbackLoop();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (rt_active_mask == 0) {
|
||||
last_feedback_loop_serial = render_targets_serial;
|
||||
last_feedback_texture_serial = texture_bindings_serial;
|
||||
last_feedback_loop_result = false;
|
||||
return;
|
||||
}
|
||||
const u32 depth_bit = 1u << NUM_RT;
|
||||
const bool depth_active = (rt_active_mask & depth_bit) != 0;
|
||||
|
||||
const bool requires_barrier = [&] {
|
||||
for (const auto& view : views) {
|
||||
if (!view.id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool is_render_target = false;
|
||||
|
||||
for (const auto& ct_view_id : render_targets.color_buffer_ids) {
|
||||
if (ct_view_id && ct_view_id == view.id) {
|
||||
is_render_target = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_render_target && render_targets.depth_buffer_id == view.id) {
|
||||
is_render_target = true;
|
||||
}
|
||||
|
||||
if (is_render_target) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto& image_view = slot_image_views[view.id];
|
||||
|
||||
for (const auto& ct_view_id : render_targets.color_buffer_ids) {
|
||||
if (!ct_view_id) {
|
||||
{
|
||||
bool is_continue = false;
|
||||
for (size_t i = 0; i < 8; ++i)
|
||||
is_continue |= (rt_active_mask & (1u << i)) && view.id == render_targets.color_buffer_ids[i];
|
||||
if (is_continue)
|
||||
continue;
|
||||
}
|
||||
|
||||
auto& ct_view = slot_image_views[ct_view_id];
|
||||
|
||||
if (image_view.image_id == ct_view.image_id) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (render_targets.depth_buffer_id) {
|
||||
auto& zt_view = slot_image_views[render_targets.depth_buffer_id];
|
||||
if (depth_active && view.id == render_targets.depth_buffer_id)
|
||||
continue;
|
||||
|
||||
if (image_view.image_id == zt_view.image_id) {
|
||||
return true;
|
||||
}
|
||||
const ImageId view_image_id = slot_image_views[view.id].image_id;
|
||||
{
|
||||
bool is_continue = false;
|
||||
for (size_t i = 0; i < 8; ++i)
|
||||
is_continue |= (rt_active_mask & (1u << i)) && view_image_id == rt_image_id[i];
|
||||
if (is_continue)
|
||||
continue;
|
||||
}
|
||||
if (depth_active && view_image_id == rt_depth_image_id) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}();
|
||||
|
||||
last_feedback_loop_serial = render_targets_serial;
|
||||
last_feedback_texture_serial = texture_bindings_serial;
|
||||
last_feedback_loop_result = requires_barrier;
|
||||
if (requires_barrier) {
|
||||
runtime.BarrierFeedbackLoop();
|
||||
}
|
||||
|
|
@ -399,13 +403,19 @@ void TextureCache<P>::SynchronizeGraphicsDescriptors() {
|
|||
const bool linked_tsc = maxwell3d->regs.sampler_binding == SamplerBinding::ViaHeaderBinding;
|
||||
const u32 tic_limit = maxwell3d->regs.tex_header.limit;
|
||||
const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tex_sampler.limit;
|
||||
bool bindings_changed = false;
|
||||
if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(),
|
||||
tsc_limit)) {
|
||||
channel_state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
|
||||
bindings_changed = true;
|
||||
}
|
||||
if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(),
|
||||
tic_limit)) {
|
||||
channel_state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
|
||||
bindings_changed = true;
|
||||
}
|
||||
if (bindings_changed) {
|
||||
++texture_bindings_serial;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -415,12 +425,18 @@ void TextureCache<P>::SynchronizeComputeDescriptors() {
|
|||
const u32 tic_limit = kepler_compute->regs.tic.limit;
|
||||
const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
|
||||
const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address();
|
||||
bool bindings_changed = false;
|
||||
if (channel_state->compute_sampler_table.Synchronize(tsc_gpu_addr, tsc_limit)) {
|
||||
channel_state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
|
||||
bindings_changed = true;
|
||||
}
|
||||
if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(),
|
||||
tic_limit)) {
|
||||
channel_state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
|
||||
bindings_changed = true;
|
||||
}
|
||||
if (bindings_changed) {
|
||||
++texture_bindings_serial;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -534,6 +550,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
|
|||
return;
|
||||
}
|
||||
|
||||
const VideoCommon::RenderTargets previous_render_targets = render_targets;
|
||||
const bool rescaled = RescaleRenderTargets();
|
||||
if (is_rescaling != rescaled) {
|
||||
flags[Dirty::RescaleViewports] = true;
|
||||
|
|
@ -549,6 +566,21 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
|
|||
|
||||
PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
|
||||
|
||||
rt_active_mask = 0;
|
||||
rt_image_id = {};
|
||||
for (size_t i = 0; i < rt_image_id.size(); ++i) {
|
||||
if (ImageViewId const view = render_targets.color_buffer_ids[i]; view) {
|
||||
rt_active_mask |= 1u << i;
|
||||
rt_image_id[i] = slot_image_views[view].image_id;
|
||||
}
|
||||
}
|
||||
if (depth_buffer_id) {
|
||||
rt_active_mask |= (1u << NUM_RT);
|
||||
rt_depth_image_id = slot_image_views[depth_buffer_id].image_id;
|
||||
} else {
|
||||
rt_depth_image_id = ImageId{};
|
||||
}
|
||||
|
||||
for (size_t index = 0; index < NUM_RT; ++index) {
|
||||
render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d->regs.rt_control.Map(index));
|
||||
}
|
||||
|
|
@ -564,12 +596,22 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
|
|||
};
|
||||
render_targets.is_rescaled = is_rescaling;
|
||||
|
||||
if (render_targets != previous_render_targets) {
|
||||
++render_targets_serial;
|
||||
}
|
||||
|
||||
flags[Dirty::DepthBiasGlobal] = true;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
|
||||
return &slot_framebuffers[GetFramebufferId(render_targets)];
|
||||
if (last_framebuffer_id && last_framebuffer_serial == render_targets_serial) {
|
||||
return &slot_framebuffers[last_framebuffer_id];
|
||||
}
|
||||
const FramebufferId framebuffer_id = GetFramebufferId(render_targets);
|
||||
last_framebuffer_id = framebuffer_id;
|
||||
last_framebuffer_serial = render_targets_serial;
|
||||
return &slot_framebuffers[framebuffer_id];
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
|
@ -2610,6 +2652,10 @@ void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_vi
|
|||
if (it->first.Contains(removed_views)) {
|
||||
auto framebuffer_id = it->second;
|
||||
ASSERT(framebuffer_id);
|
||||
if (framebuffer_id == last_framebuffer_id) {
|
||||
last_framebuffer_id = {};
|
||||
last_framebuffer_serial = 0;
|
||||
}
|
||||
sentenced_framebuffers.Push(std::move(slot_framebuffers[framebuffer_id]));
|
||||
it = framebuffers.erase(it);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -455,6 +455,16 @@ private:
|
|||
std::deque<TextureCacheGPUMap> gpu_page_table_storage;
|
||||
|
||||
RenderTargets render_targets;
|
||||
u64 render_targets_serial = 0;
|
||||
u32 rt_active_mask = 0;
|
||||
std::array<ImageId, 8> rt_image_id{};
|
||||
ImageId rt_depth_image_id{};
|
||||
u64 texture_bindings_serial = 0;
|
||||
u64 last_feedback_loop_serial = 0;
|
||||
u64 last_feedback_texture_serial = 0;
|
||||
bool last_feedback_loop_result = false;
|
||||
FramebufferId last_framebuffer_id{};
|
||||
u64 last_framebuffer_serial = 0;
|
||||
|
||||
ankerl::unordered_dense::map<RenderTargets, FramebufferId> framebuffers;
|
||||
ankerl::unordered_dense::map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue