Compare commits

...

12 commits

9 changed files with 205 additions and 110 deletions

View file

@ -43,6 +43,38 @@ fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) {
using namespace Common::Literals; using namespace Common::Literals;
constexpr u32 StackSize = 128_KiB; constexpr u32 StackSize = 128_KiB;
constexpr u64 SplitPageAccessWindow = 64;
constexpr size_t MaxPreciseAccessPages = 256;
constexpr u8 MaxPreciseAccessPageWeight = 4;
[[nodiscard]] constexpr u64 AlignDownPage(u64 addr) {
return addr & ~u64{Memory::YUZU_PAGEMASK};
}
[[nodiscard]] bool IsNearPageBoundary(u64 addr) {
const u64 page_offset = addr & Memory::YUZU_PAGEMASK;
return page_offset < SplitPageAccessWindow ||
page_offset + SplitPageAccessWindow > Memory::YUZU_PAGESIZE;
}
[[nodiscard]] bool IsNearTlsWindow(u64 tls_base, u64 fault_addr) {
if (tls_base == 0) {
return false;
}
const u64 tls_first_page = AlignDownPage(tls_base);
const u64 tls_last_byte = tls_base + Kernel::Svc::ThreadLocalRegionSize - 1;
const u64 tls_last_page = AlignDownPage(tls_last_byte);
const u64 fault_page = AlignDownPage(fault_addr);
return fault_page + Memory::YUZU_PAGESIZE >= tls_first_page &&
fault_page <= tls_last_page + Memory::YUZU_PAGESIZE;
}
[[nodiscard]] bool ShouldUsePreciseAccessChannel(const GuestContext* guest_ctx, u64 fault_addr) {
return IsNearPageBoundary(fault_addr) || IsNearTlsWindow(guest_ctx->tpidrro_el0, fault_addr) ||
IsNearTlsWindow(guest_ctx->tpidr_el0, fault_addr);
}
} // namespace } // namespace
@ -158,18 +190,46 @@ bool ArmNce::HandleGuestAlignmentFault(GuestContext* guest_ctx, void* raw_info,
} }
bool ArmNce::HandleGuestAccessFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) { bool ArmNce::HandleGuestAccessFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) {
auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
auto* fpctx = GetFloatingPointState(host_ctx);
auto* info = static_cast<siginfo_t*>(raw_info); auto* info = static_cast<siginfo_t*>(raw_info);
auto* parent = guest_ctx->parent;
// Try to handle an invalid access. const u64 fault_addr = reinterpret_cast<u64>(info->si_addr);
// TODO: handle accesses which split a page? const Common::ProcessAddress addr = fault_addr & ~Memory::YUZU_PAGEMASK;
const Common::ProcessAddress addr = const u64 page_offset = fault_addr & Memory::YUZU_PAGEMASK;
(reinterpret_cast<u64>(info->si_addr) & ~Memory::YUZU_PAGEMASK); auto& memory = parent->m_running_thread->GetOwnerProcess()->GetMemory();
auto& memory = guest_ctx->parent->m_running_thread->GetOwnerProcess()->GetMemory(); const bool prefer_precise_channel = ShouldUsePreciseAccessChannel(guest_ctx, fault_addr) ||
if (memory.InvalidateNCE(addr, Memory::YUZU_PAGESIZE)) { parent->IsPreciseAccessPage(fault_addr);
if (prefer_precise_channel) {
if (auto next_pc = MatchAndExecuteOneInstruction(memory, &host_ctx, fpctx); next_pc) {
parent->MarkPreciseAccessFaultWindow(fault_addr);
host_ctx.pc = *next_pc;
return true;
}
}
bool handled = memory.InvalidateNCE(addr, Memory::YUZU_PAGESIZE);
if (page_offset < SplitPageAccessWindow && addr >= Memory::YUZU_PAGESIZE) {
handled |= memory.InvalidateNCE(addr - Memory::YUZU_PAGESIZE, Memory::YUZU_PAGESIZE);
}
if (page_offset + SplitPageAccessWindow > Memory::YUZU_PAGESIZE) {
handled |= memory.InvalidateNCE(addr + Memory::YUZU_PAGESIZE, Memory::YUZU_PAGESIZE);
}
if (handled) {
// We handled the access successfully and are returning to guest code. // We handled the access successfully and are returning to guest code.
return true; return true;
} }
if (auto next_pc = MatchAndExecuteOneInstruction(memory, &host_ctx, fpctx); next_pc) {
parent->MarkPreciseAccessFaultWindow(fault_addr);
host_ctx.pc = *next_pc;
return true;
}
// We couldn't handle the access. // We couldn't handle the access.
return HandleFailedGuestFault(guest_ctx, raw_info, raw_context); return HandleFailedGuestFault(guest_ctx, raw_info, raw_context);
} }
@ -182,6 +242,53 @@ void ArmNce::HandleHostAccessFault(int sig, void* raw_info, void* raw_context) {
return g_orig_segv_action.sa_sigaction(sig, static_cast<siginfo_t*>(raw_info), raw_context); return g_orig_segv_action.sa_sigaction(sig, static_cast<siginfo_t*>(raw_info), raw_context);
} }
bool ArmNce::IsPreciseAccessPage(u64 addr) const {
const std::scoped_lock lk{m_precise_pages_guard};
return m_precise_pages.contains(AlignDownPage(addr));
}
void ArmNce::MarkPreciseAccessPage(u64 addr) {
const std::scoped_lock lk{m_precise_pages_guard};
const u64 page = AlignDownPage(addr);
if (auto it = m_precise_pages.find(page); it != m_precise_pages.end()) {
it->second = std::min<u8>(MaxPreciseAccessPageWeight, static_cast<u8>(it->second + 1));
return;
}
while (m_precise_pages.size() >= MaxPreciseAccessPages) {
DecayPreciseAccessPagesLocked();
}
m_precise_pages.emplace(page, 1);
}
void ArmNce::MarkPreciseAccessFaultWindow(u64 addr) {
MarkPreciseAccessPage(addr);
if (!IsNearPageBoundary(addr)) {
return;
}
const u64 page_offset = addr & Memory::YUZU_PAGEMASK;
if (page_offset < SplitPageAccessWindow && addr >= Memory::YUZU_PAGESIZE) {
MarkPreciseAccessPage(addr - Memory::YUZU_PAGESIZE);
}
if (page_offset + SplitPageAccessWindow > Memory::YUZU_PAGESIZE) {
MarkPreciseAccessPage(addr + Memory::YUZU_PAGESIZE);
}
}
void ArmNce::DecayPreciseAccessPagesLocked() {
for (auto it = m_precise_pages.begin(); it != m_precise_pages.end();) {
if (it->second > 1) {
--it->second;
++it;
} else {
it = m_precise_pages.erase(it);
}
}
}
void ArmNce::LockThread(Kernel::KThread* thread) { void ArmNce::LockThread(Kernel::KThread* thread) {
auto* thread_params = &thread->GetNativeExecutionParameters(); auto* thread_params = &thread->GetNativeExecutionParameters();
LockThreadParameters(thread_params); LockThreadParameters(thread_params);

View file

@ -1,9 +1,13 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include <mutex> #include <mutex>
#include <unordered_map>
#include "core/arm/arm_interface.h" #include "core/arm/arm_interface.h"
#include "core/arm/nce/guest_context.h" #include "core/arm/nce/guest_context.h"
@ -77,6 +81,11 @@ private:
static void HandleHostAlignmentFault(int sig, void* info, void* raw_context); static void HandleHostAlignmentFault(int sig, void* info, void* raw_context);
static void HandleHostAccessFault(int sig, void* info, void* raw_context); static void HandleHostAccessFault(int sig, void* info, void* raw_context);
bool IsPreciseAccessPage(u64 addr) const;
void MarkPreciseAccessPage(u64 addr);
void MarkPreciseAccessFaultWindow(u64 addr);
void DecayPreciseAccessPagesLocked();
public: public:
Core::System& m_system; Core::System& m_system;
@ -88,6 +97,9 @@ public:
GuestContext m_guest_ctx{}; GuestContext m_guest_ctx{};
Kernel::KThread* m_running_thread{}; Kernel::KThread* m_running_thread{};
mutable std::mutex m_precise_pages_guard{};
std::unordered_map<u64, u8> m_precise_pages{};
// Stack for signal processing. // Stack for signal processing.
std::unique_ptr<u8[]> m_stack{}; std::unique_ptr<u8[]> m_stack{};
}; };

View file

@ -51,37 +51,6 @@ void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell&
state.varyings = regs.stream_out_layout; state.varyings = regs.stream_out_layout;
} }
Maxwell::PrimitiveTopology NormalizeDynamicTopologyClass(Maxwell::PrimitiveTopology topology) {
switch (topology) {
case Maxwell::PrimitiveTopology::Points:
return Maxwell::PrimitiveTopology::Points;
case Maxwell::PrimitiveTopology::Lines:
case Maxwell::PrimitiveTopology::LineStrip:
return Maxwell::PrimitiveTopology::Lines;
case Maxwell::PrimitiveTopology::Triangles:
case Maxwell::PrimitiveTopology::TriangleStrip:
case Maxwell::PrimitiveTopology::TriangleFan:
case Maxwell::PrimitiveTopology::Quads:
case Maxwell::PrimitiveTopology::QuadStrip:
case Maxwell::PrimitiveTopology::Polygon:
case Maxwell::PrimitiveTopology::LineLoop:
return Maxwell::PrimitiveTopology::Triangles;
case Maxwell::PrimitiveTopology::LinesAdjacency:
case Maxwell::PrimitiveTopology::LineStripAdjacency:
return Maxwell::PrimitiveTopology::LinesAdjacency;
case Maxwell::PrimitiveTopology::TrianglesAdjacency:
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
return Maxwell::PrimitiveTopology::TrianglesAdjacency;
case Maxwell::PrimitiveTopology::Patches:
return Maxwell::PrimitiveTopology::Patches;
}
return topology;
}
} // Anonymous namespace } // Anonymous namespace
void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFeatures& features) { void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFeatures& features) {
@ -103,9 +72,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe
tessellation_clockwise.Assign(regs.tessellation.params.output_primitives.Value() == tessellation_clockwise.Assign(regs.tessellation.params.output_primitives.Value() ==
Maxwell::Tessellation::OutputPrimitives::Triangles_CW); Maxwell::Tessellation::OutputPrimitives::Triangles_CW);
patch_control_points_minus_one.Assign(regs.patch_vertices - 1); patch_control_points_minus_one.Assign(regs.patch_vertices - 1);
const bool can_normalize_topology = topology.Assign(topology_);
features.has_extended_dynamic_state && features.has_extended_dynamic_state_2;
topology.Assign(can_normalize_topology ? NormalizeDynamicTopologyClass(topology_) : topology_);
msaa_mode.Assign(regs.anti_alias_samples_mode); msaa_mode.Assign(regs.anti_alias_samples_mode);
raw2 = 0; raw2 = 0;
@ -164,15 +131,11 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe
} }
dynamic_state.raw1 = 0; dynamic_state.raw1 = 0;
dynamic_state.raw2 = 0; dynamic_state.raw2 = 0;
if (!extended_dynamic_state) { dynamic_state.Refresh(regs);
dynamic_state.Refresh(regs);
}
std::ranges::transform(regs.vertex_streams, vertex_strides.begin(), [](const auto& array) { std::ranges::transform(regs.vertex_streams, vertex_strides.begin(), [](const auto& array) {
return static_cast<u16>(array.stride.Value()); return static_cast<u16>(array.stride.Value());
}); });
if (!extended_dynamic_state_2) { dynamic_state.Refresh2(regs, topology_, false);
dynamic_state.Refresh2(regs, topology_, extended_dynamic_state_2);
}
if (maxwell3d.dirty.flags[Dirty::Blending]) { if (maxwell3d.dirty.flags[Dirty::Blending]) {
maxwell3d.dirty.flags[Dirty::Blending] = false; maxwell3d.dirty.flags[Dirty::Blending] = false;
for (size_t index = 0; index < attachments.size(); ++index) { for (size_t index = 0; index < attachments.size(); ++index) {

View file

@ -254,11 +254,9 @@ struct FixedPipelineState {
// When transform feedback is enabled, use the whole struct // When transform feedback is enabled, use the whole struct
return sizeof(*this); return sizeof(*this);
} }
if (extended_dynamic_state) { // Always include the cached dynamic-state payload in the key. Some members of
// Exclude dynamic state // `dynamic_state` still feed static pipeline state even when EDS is enabled,
return offsetof(FixedPipelineState, vertex_strides); // and excluding the whole block causes incorrect pipeline reuse.
}
// Default
return offsetof(FixedPipelineState, xfb_state); return offsetof(FixedPipelineState, xfb_state);
} }
}; };

View file

@ -101,6 +101,37 @@ bool IsLine(VkPrimitiveTopology topology) {
return std::ranges::find(line_topologies, topology) != line_topologies.end(); return std::ranges::find(line_topologies, topology) != line_topologies.end();
} }
VkPrimitiveTopology DynamicTopologyClassRepresentative(VkPrimitiveTopology topology) {
switch (topology) {
case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY;
case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
default:
return topology;
}
}
bool SupportsStaticPrimitiveRestart(const Device& device, VkPrimitiveTopology topology) {
if (topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) {
return device.IsPatchListPrimitiveRestartSupported();
}
return SupportsPrimitiveRestart(topology) || device.IsTopologyListPrimitiveRestartSupported();
}
VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
union Swizzle { union Swizzle {
u32 raw; u32 raw;
@ -531,12 +562,6 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling,
} }
const void* const descriptor_data{guest_descriptor_queue.UpdateData()}; const void* const descriptor_data{guest_descriptor_queue.UpdateData()};
FixedPipelineState::DynamicState dynamic_state{};
if (!key.state.extended_dynamic_state) {
dynamic_state = key.state.dynamic_state;
} else {
dynamic_state.raw1 = key.state.dynamic_state.raw1;
}
scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(), scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(),
is_rescaling, update_rescaling, is_rescaling, update_rescaling,
uses_render_area = render_area.uses_render_area, uses_render_area = render_area.uses_render_area,
@ -576,12 +601,7 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling,
} }
void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
FixedPipelineState::DynamicState dynamic{}; const FixedPipelineState::DynamicState dynamic{key.state.dynamic_state};
if (!key.state.extended_dynamic_state) {
dynamic = key.state.dynamic_state;
} else {
dynamic.raw1 = key.state.dynamic_state.raw1;
}
static_vector<VkVertexInputBindingDescription, 32> vertex_bindings; static_vector<VkVertexInputBindingDescription, 32> vertex_bindings;
static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors; static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors;
static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes; static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes;
@ -635,11 +655,13 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
vertex_input_ci.pNext = &input_divisor_ci; vertex_input_ci.pNext = &input_divisor_ci;
} }
const bool has_tess_stages = spv_modules[1] || spv_modules[2]; const bool has_tess_stages = spv_modules[1] || spv_modules[2];
auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology); const bool dynamic_topology = key.state.extended_dynamic_state != 0;
if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) { const bool dynamic_primitive_restart = key.state.extended_dynamic_state_2 != 0;
auto exact_input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology);
if (exact_input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) {
if (!has_tess_stages) { if (!has_tess_stages) {
LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points"); LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points");
input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; exact_input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
} }
} else { } else {
if (has_tess_stages) { if (has_tess_stages) {
@ -647,25 +669,29 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
// shader stages. Forcing it fixes a crash on some drivers // shader stages. Forcing it fixes a crash on some drivers
LOG_WARNING(Render_Vulkan, LOG_WARNING(Render_Vulkan,
"Patch topology not used with tessellation, using patch list"); "Patch topology not used with tessellation, using patch list");
input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; exact_input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
} }
} }
const VkPrimitiveTopology input_assembly_topology =
dynamic_topology && dynamic_primitive_restart
? DynamicTopologyClassRepresentative(exact_input_assembly_topology)
: exact_input_assembly_topology;
const VkBool32 primitive_restart_enable =
// MoltenVK/Metal always has primitive restart enabled and cannot disable it
device.IsMoltenVK()
? VK_TRUE
: (dynamic_primitive_restart
? VK_FALSE
: (dynamic.primitive_restart_enable != 0 &&
SupportsStaticPrimitiveRestart(device, input_assembly_topology)
? VK_TRUE
: VK_FALSE));
const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pNext = nullptr, .pNext = nullptr,
.flags = 0, .flags = 0,
.topology = input_assembly_topology, .topology = input_assembly_topology,
.primitiveRestartEnable = .primitiveRestartEnable = primitive_restart_enable,
// MoltenVK/Metal always has primitive restart enabled and cannot disable it
device.IsMoltenVK() ? VK_TRUE :
(dynamic.primitive_restart_enable != 0 &&
((input_assembly_topology != VK_PRIMITIVE_TOPOLOGY_PATCH_LIST &&
device.IsTopologyListPrimitiveRestartSupported()) ||
SupportsPrimitiveRestart(input_assembly_topology) ||
(input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST &&
device.IsPatchListPrimitiveRestartSupported()))
? VK_TRUE
: VK_FALSE),
}; };
const VkPipelineTessellationStateCreateInfo tessellation_ci{ const VkPipelineTessellationStateCreateInfo tessellation_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,

View file

@ -751,19 +751,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
descriptor_pool, guest_descriptor_queue, thread_worker, statistics, render_pass_cache, key, descriptor_pool, guest_descriptor_queue, thread_worker, statistics, render_pass_cache, key,
std::move(modules), infos); std::move(modules), infos);
} catch (const vk::Exception& exception) {
const auto hash = key.Hash();
LOG_ERROR(
Render_Vulkan,
"Failed to create graphics pipeline 0x{:016x}: {} (result={}, eds={}, eds2={}, "
"eds2_logic_op={}, topology={}, provoking_last={}, xfb={}, conservative={})",
hash, exception.what(), static_cast<int>(exception.GetResult()),
key.state.extended_dynamic_state != 0, key.state.extended_dynamic_state_2 != 0,
key.state.extended_dynamic_state_2_logic_op != 0, static_cast<u32>(key.state.topology.Value()),
key.state.provoking_vertex_last != 0, key.state.xfb_enabled != 0,
key.state.conservative_raster_enable != 0);
return nullptr;
} catch (const Shader::Exception& exception) { } catch (const Shader::Exception& exception) {
auto hash = key.Hash(); auto hash = key.Hash();
size_t env_index{0}; size_t env_index{0};

View file

@ -277,10 +277,6 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
if (!pipeline->Configure(is_indexed)) if (!pipeline->Configure(is_indexed))
return; return;
if (pipeline->UsesExtendedDynamicState()) {
state_tracker.InvalidateStateEnableFlag();
}
HandleTransformFeedback(); HandleTransformFeedback();
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable); maxwell3d->regs.zpass_pixel_count_enable);
@ -1078,19 +1074,18 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateLineWidth(regs); UpdateLineWidth(regs);
UpdateLineStipple(regs); UpdateLineStipple(regs);
// EDS1: CullMode, DepthCompare, FrontFace, StencilOp, DepthBoundsTest, DepthTest, DepthWrite, StencilTest // EDS1: CullMode, DepthCompare, FrontFace, PrimitiveTopology, StencilOp,
// DepthBoundsTest, DepthTest, DepthWrite, StencilTest
if (device.IsExtExtendedDynamicStateSupported() && pipeline && pipeline->UsesExtendedDynamicState()) { if (device.IsExtExtendedDynamicStateSupported() && pipeline && pipeline->UsesExtendedDynamicState()) {
UpdateCullMode(regs); UpdateCullMode(regs);
UpdateDepthCompareOp(regs); UpdateDepthCompareOp(regs);
UpdateFrontFace(regs); UpdateFrontFace(regs);
UpdatePrimitiveTopology(regs); UpdatePrimitiveTopology(regs);
UpdateStencilOp(regs); UpdateStencilOp(regs);
if (state_tracker.TouchStateEnable()) { UpdateDepthBoundsTestEnable(regs);
UpdateDepthBoundsTestEnable(regs); UpdateDepthTestEnable(regs);
UpdateDepthTestEnable(regs); UpdateDepthWriteEnable(regs);
UpdateDepthWriteEnable(regs); UpdateStencilTestEnable(regs);
UpdateStencilTestEnable(regs);
}
} }
UpdateStencilFaces(regs); UpdateStencilFaces(regs);
@ -1162,7 +1157,7 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
}; };
GraphicsPipeline* pipeline = pipeline_cache.CurrentGraphicsPipeline(); GraphicsPipeline* pipeline = pipeline_cache.CurrentGraphicsPipeline();
const bool use_viewport_with_count = device.IsExtExtendedDynamicStateSupported() && const bool use_viewport_with_count = device.IsExtExtendedDynamicStateSupported() &&
(!pipeline || pipeline->UsesExtendedDynamicState()); pipeline && pipeline->UsesExtendedDynamicState();
scheduler.Record([viewport, use_viewport_with_count](vk::CommandBuffer cmdbuf) { scheduler.Record([viewport, use_viewport_with_count](vk::CommandBuffer cmdbuf) {
if (use_viewport_with_count) { if (use_viewport_with_count) {
std::array viewports{viewport}; std::array viewports{viewport};
@ -1187,7 +1182,7 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
}; };
GraphicsPipeline* pipeline = pipeline_cache.CurrentGraphicsPipeline(); GraphicsPipeline* pipeline = pipeline_cache.CurrentGraphicsPipeline();
const bool use_viewport_with_count = device.IsExtExtendedDynamicStateSupported() && const bool use_viewport_with_count = device.IsExtExtendedDynamicStateSupported() &&
(!pipeline || pipeline->UsesExtendedDynamicState()); pipeline && pipeline->UsesExtendedDynamicState();
scheduler.Record([this, viewport_list, use_viewport_with_count](vk::CommandBuffer cmdbuf) { scheduler.Record([this, viewport_list, use_viewport_with_count](vk::CommandBuffer cmdbuf) {
const u32 num_viewports = std::min<u32>(device.GetMaxViewports(), Maxwell::NumViewports); const u32 num_viewports = std::min<u32>(device.GetMaxViewports(), Maxwell::NumViewports);
const vk::Span<VkViewport> viewports(viewport_list.data(), num_viewports); const vk::Span<VkViewport> viewports(viewport_list.data(), num_viewports);
@ -1218,7 +1213,7 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
scissor.extent.height = height; scissor.extent.height = height;
GraphicsPipeline* pipeline = pipeline_cache.CurrentGraphicsPipeline(); GraphicsPipeline* pipeline = pipeline_cache.CurrentGraphicsPipeline();
const bool use_scissor_with_count = device.IsExtExtendedDynamicStateSupported() && const bool use_scissor_with_count = device.IsExtExtendedDynamicStateSupported() &&
(!pipeline || pipeline->UsesExtendedDynamicState()); pipeline && pipeline->UsesExtendedDynamicState();
scheduler.Record([scissor, use_scissor_with_count](vk::CommandBuffer cmdbuf) { scheduler.Record([scissor, use_scissor_with_count](vk::CommandBuffer cmdbuf) {
if (use_scissor_with_count) { if (use_scissor_with_count) {
std::array scissors{scissor}; std::array scissors{scissor};
@ -1255,7 +1250,7 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
}; };
GraphicsPipeline* pipeline = pipeline_cache.CurrentGraphicsPipeline(); GraphicsPipeline* pipeline = pipeline_cache.CurrentGraphicsPipeline();
const bool use_scissor_with_count = device.IsExtExtendedDynamicStateSupported() && const bool use_scissor_with_count = device.IsExtExtendedDynamicStateSupported() &&
(!pipeline || pipeline->UsesExtendedDynamicState()); pipeline && pipeline->UsesExtendedDynamicState();
scheduler.Record([this, scissor_list, use_scissor_with_count](vk::CommandBuffer cmdbuf) { scheduler.Record([this, scissor_list, use_scissor_with_count](vk::CommandBuffer cmdbuf) {
const u32 num_scissors = std::min<u32>(device.GetMaxViewports(), Maxwell::NumViewports); const u32 num_scissors = std::min<u32>(device.GetMaxViewports(), Maxwell::NumViewports);
const vk::Span<VkRect2D> scissors(scissor_list.data(), num_scissors); const vk::Span<VkRect2D> scissors(scissor_list.data(), num_scissors);

View file

@ -154,9 +154,11 @@ void Scheduler::RequestOutsideRenderPassOperationContext() {
bool Scheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) { bool Scheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
if (state.graphics_pipeline == pipeline) { if (state.graphics_pipeline == pipeline) {
if (pipeline && pipeline->UsesExtendedDynamicState() && if (pipeline &&
(pipeline->UsesExtendedDynamicState() || pipeline->UsesExtendedDynamicState2() ||
pipeline->UsesExtendedDynamicState2LogicOp()) &&
state.needs_state_enable_refresh) { state.needs_state_enable_refresh) {
state_tracker.InvalidateStateEnableFlag(); state_tracker.InvalidateExtendedDynamicStates();
state.needs_state_enable_refresh = false; state.needs_state_enable_refresh = false;
} }
return false; return false;
@ -173,10 +175,11 @@ bool Scheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
state_tracker.InvalidateExtendedDynamicStates(); state_tracker.InvalidateExtendedDynamicStates();
} }
if (!pipeline->UsesExtendedDynamicState()) { if (!pipeline->UsesExtendedDynamicState() && !pipeline->UsesExtendedDynamicState2() &&
!pipeline->UsesExtendedDynamicState2LogicOp()) {
state.needs_state_enable_refresh = true; state.needs_state_enable_refresh = true;
} else if (state.needs_state_enable_refresh) { } else if (state.needs_state_enable_refresh) {
state_tracker.InvalidateStateEnableFlag(); state_tracker.InvalidateExtendedDynamicStates();
state.needs_state_enable_refresh = false; state.needs_state_enable_refresh = false;
} }

View file

@ -98,9 +98,13 @@ public:
(*flags)[Dirty::Viewports] = true; (*flags)[Dirty::Viewports] = true;
(*flags)[Dirty::Scissors] = true; (*flags)[Dirty::Scissors] = true;
(*flags)[Dirty::CullMode] = true; (*flags)[Dirty::CullMode] = true;
(*flags)[Dirty::DepthBoundsEnable] = true;
(*flags)[Dirty::DepthTestEnable] = true;
(*flags)[Dirty::DepthWriteEnable] = true;
(*flags)[Dirty::DepthCompareOp] = true; (*flags)[Dirty::DepthCompareOp] = true;
(*flags)[Dirty::FrontFace] = true; (*flags)[Dirty::FrontFace] = true;
(*flags)[Dirty::StencilOp] = true; (*flags)[Dirty::StencilOp] = true;
(*flags)[Dirty::StencilTestEnable] = true;
(*flags)[Dirty::StateEnable] = true; (*flags)[Dirty::StateEnable] = true;
(*flags)[Dirty::PrimitiveRestartEnable] = true; (*flags)[Dirty::PrimitiveRestartEnable] = true;
(*flags)[Dirty::RasterizerDiscardEnable] = true; (*flags)[Dirty::RasterizerDiscardEnable] = true;