Compare commits

..

6 commits

12 changed files with 142 additions and 45 deletions

View file

@ -491,6 +491,9 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
}
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
if (!ctx.runtime_info.active_color_outputs[index]) {
return;
}
const Id component_id{ctx.Const(component)};
const AttributeType type{ctx.runtime_info.color_output_types[index]};
if (type == AttributeType::Float) {

View file

@ -1688,8 +1688,10 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
case Stage::Fragment:
for (u32 index = 0; index < 8; ++index) {
const bool need_dual_source = runtime_info.dual_source_blend && index <= 1;
if (!need_dual_source && !info.stores_frag_color[index] &&
!profile.need_declared_frag_colors) {
const bool should_declare = runtime_info.active_color_outputs[index] &&
(info.stores_frag_color[index] ||
profile.need_declared_frag_colors);
if (!need_dual_source && !should_declare) {
continue;
}
const Id type{GetAttributeType(*this, runtime_info.color_output_types[index])};

View file

@ -142,6 +142,55 @@ FpControlHistogram CollectFpControlHistogram(const IR::Program& program) {
return histogram;
}
void LogRzFpControlTrace(Environment& env, const IR::Program& program) {
std::array<u32, 2> totals{};
for (const IR::Block* const block : program.post_order_blocks) {
for (const IR::Inst& inst : block->Instructions()) {
const std::optional<size_t> bucket{FpControlBucket(inst.GetOpcode())};
if (!bucket) {
continue;
}
const auto flags{inst.Flags<IR::FpControl>()};
if (flags.rounding != IR::FpRounding::RZ) {
continue;
}
++totals[*bucket];
}
}
if (totals[0] == 0 && totals[1] == 0) {
return;
}
constexpr std::array<std::string_view, 2> precision_names{"fp16", "fp32"};
LOG_INFO(Shader,
"FP_RZ {} shader start={:#010x} blocks={} post_order_blocks={} fp16={} fp32={}",
StageName(program.stage), env.StartAddress(), program.blocks.size(),
program.post_order_blocks.size(), totals[0], totals[1]);
for (const IR::Block* const block : program.post_order_blocks) {
u32 inst_index{};
for (const IR::Inst& inst : block->Instructions()) {
const std::optional<size_t> bucket{FpControlBucket(inst.GetOpcode())};
if (!bucket) {
++inst_index;
continue;
}
const auto flags{inst.Flags<IR::FpControl>()};
if (flags.rounding != IR::FpRounding::RZ) {
++inst_index;
continue;
}
LOG_INFO(Shader,
"FP_RZ {} start={:#010x} block_order={} inst_index={} precision={} opcode={} no_contraction={} fmz={}",
StageName(program.stage), env.StartAddress(), block->GetOrder(), inst_index,
precision_names[*bucket], inst.GetOpcode(), flags.no_contraction,
FmzName(flags.fmz_mode));
++inst_index;
}
}
}
void LogFpControlHistogram(const IR::Program& program) {
const FpControlHistogram histogram{CollectFpControlHistogram(program)};
if (histogram.total[0] == 0 && histogram.total[1] == 0) {
@ -479,6 +528,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
if (Settings::values.renderer_debug) {
LogFpControlHistogram(program);
LogRzFpControlTrace(env, program);
}
CollectInterpolationInfo(env, program);
@ -518,6 +568,7 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b
Optimization::CollectShaderInfoPass(env_vertex_b, result);
if (Settings::values.renderer_debug) {
LogFpControlHistogram(result);
LogRzFpControlTrace(env_vertex_b, result);
}
return result;
}

View file

@ -111,6 +111,9 @@ struct RuntimeInfo {
/// Output types for each color attachment
std::array<AttributeType, 8> color_output_types{};
/// Fragment color outputs that are active for the current pipeline.
std::array<bool, 8> active_color_outputs{true, true, true, true, true, true, true, true};
/// Dual source blending
bool dual_source_blend{};
};

View file

@ -56,16 +56,23 @@ namespace {
[[nodiscard]] VkImageSubresourceRange SubresourceRangeFromView(const ImageView& image_view) {
auto range = image_view.range;
const bool is_3d_image = image_view.type == VideoCommon::ImageViewType::e3D ||
(image_view.flags & VideoCommon::ImageViewFlagBits::Slice) !=
VideoCommon::ImageViewFlagBits{};
if ((image_view.flags & VideoCommon::ImageViewFlagBits::Slice) != VideoCommon::ImageViewFlagBits{}) {
range.base.layer = 0;
range.extent.layers = 1;
}
u32 layer_count = static_cast<u32>(range.extent.layers);
if (is_3d_image && layer_count == 1) {
layer_count = VK_REMAINING_ARRAY_LAYERS;
}
return VkImageSubresourceRange{
.aspectMask = AspectMaskFromFormat(image_view.format),
.baseMipLevel = static_cast<u32>(range.base.level),
.levelCount = static_cast<u32>(range.extent.levels),
.baseArrayLayer = static_cast<u32>(range.base.layer),
.layerCount = static_cast<u32>(range.extent.layers),
.layerCount = layer_count,
};
}

View file

@ -249,6 +249,17 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
dst_a == F::Source1Alpha_GL || dst_a == F::OneMinusSource1Alpha_GL;
}
for (size_t i = 0; i < info.active_color_outputs.size(); ++i) {
const auto format = static_cast<Tegra::RenderTargetFormat>(key.state.color_formats[i]);
info.active_color_outputs[i] = format != Tegra::RenderTargetFormat::NONE;
}
if (info.dual_source_blend && info.active_color_outputs[0]) {
info.active_color_outputs[1] = true;
}
if (info.alpha_test_func && *info.alpha_test_func != Shader::CompareFunction::Always) {
info.active_color_outputs[0] = true;
}
if (device.IsMoltenVK()) {
for (size_t i = 0; i < 8; ++i) {
const auto format = static_cast<Tegra::RenderTargetFormat>(key.state.color_formats[i]);

View file

@ -1075,7 +1075,6 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateDepthBias(regs);
UpdateBlendConstants(regs);
UpdateDepthBounds(regs);
UpdateStencilFaces(regs);
UpdateLineWidth(regs);
UpdateLineStipple(regs);
@ -1094,6 +1093,8 @@ void RasterizerVulkan::UpdateDynamicStates() {
}
}
UpdateStencilFaces(regs);
// EDS2: PrimitiveRestart, RasterizerDiscard, DepthBias enable/disable
if (device.IsExtExtendedDynamicState2Supported() && pipeline && pipeline->UsesExtendedDynamicState2()) {
UpdatePrimitiveRestartEnable(regs);
@ -1640,6 +1641,9 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
if (!state_tracker.TouchStencilTestEnable()) {
return;
}
if (regs.stencil_enable != 0) {
state_tracker.ResetStencilState();
}
scheduler.Record([enable = regs.stencil_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetStencilTestEnableEXT(enable);
});

View file

@ -170,6 +170,10 @@ public:
return ExchangeCheck(back.compare_mask, new_value) || stencil_reset;
}
void ResetStencilState() {
stencil_reset = true;
}
void ClearStencilReset() {
stencil_reset = false;
}

View file

@ -446,15 +446,24 @@ TransformBufferCopies(std::span<const VideoCommon::BufferCopy> copies, size_t bu
};
}
[[nodiscard]] VkImageSubresourceRange MakeBarrierSubresourceRange(
VkImageAspectFlags aspect_mask, const SubresourceRange& range, bool is_3d_image) {
VkImageSubresourceRange subresource_range = MakeSubresourceRange(aspect_mask, range);
if (is_3d_image && subresource_range.layerCount == 1) {
subresource_range.layerCount = VK_REMAINING_ARRAY_LAYERS;
}
return subresource_range;
}
[[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(const ImageView* image_view) {
SubresourceRange range = image_view->range;
const bool is_3d_image = image_view->type == VideoCommon::ImageViewType::e3D ||
True(image_view->flags & VideoCommon::ImageViewFlagBits::Slice);
if (True(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
// Slice image views always affect a single layer, but their subresource range corresponds
// to the slice. Override the value to affect a single layer.
range.base.layer = 0;
range.extent.layers = 1;
}
return MakeSubresourceRange(ImageAspectMask(image_view->format), range);
return MakeBarrierSubresourceRange(ImageAspectMask(image_view->format), range, is_3d_image);
}
[[nodiscard]] VkImageSubresourceLayers MakeSubresourceLayers(const ImageView* image_view) {
@ -524,18 +533,23 @@ struct RangedBarrierRange {
max_layer = (std::max)(max_layer, layers.baseArrayLayer + layers.layerCount);
}
VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept {
return VkImageSubresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = min_mip,
.levelCount = max_mip - min_mip,
.baseArrayLayer = min_layer,
.layerCount = max_layer - min_layer,
VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask,
bool is_3d_image) const noexcept {
const VideoCommon::SubresourceRange range{
.base = {
.level = static_cast<s32>(min_mip),
.layer = static_cast<s32>(min_layer),
},
.extent = {
.levels = static_cast<s32>(max_mip - min_mip),
.layers = static_cast<s32>(max_layer - min_layer),
},
};
return MakeBarrierSubresourceRange(aspect_mask, range, is_3d_image);
}
};
void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
VkImageAspectFlags aspect_mask, bool is_initialized,
VkImageAspectFlags aspect_mask, bool is_initialized, bool is_3d_image,
std::span<const VkBufferImageCopy> copies) {
static constexpr VkAccessFlags WRITE_ACCESS_FLAGS =
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
@ -549,7 +563,8 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im
for (const auto& region : copies) {
range.AddLayers(region.imageSubresource);
}
const VkImageSubresourceRange subresource_range = range.SubresourceRange(aspect_mask);
const VkImageSubresourceRange subresource_range =
range.SubresourceRange(aspect_mask, is_3d_image);
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
@ -1006,9 +1021,12 @@ void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
const VkBuffer copy_buffer = GetTemporaryBuffer(total_size);
const VkImage dst_image = dst.Handle();
const VkImage src_image = src.Handle();
const bool dst_is_3d = dst.info.type == ImageType::e3D;
const bool src_is_3d = src.info.type == ImageType::e3D;
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([dst_image, src_image, copy_buffer, src_aspect_mask, dst_aspect_mask,
vk_in_copies, vk_out_copies](vk::CommandBuffer cmdbuf) {
dst_is_3d, src_is_3d, vk_in_copies,
vk_out_copies](vk::CommandBuffer cmdbuf) {
RangedBarrierRange dst_range;
RangedBarrierRange src_range;
for (const VkBufferImageCopy& copy : vk_in_copies) {
@ -1042,7 +1060,7 @@ void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange = src_range.SubresourceRange(src_aspect_mask),
.subresourceRange = src_range.SubresourceRange(src_aspect_mask, src_is_3d),
},
};
const std::array middle_in_barrier{
@ -1056,7 +1074,7 @@ void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange = src_range.SubresourceRange(src_aspect_mask),
.subresourceRange = src_range.SubresourceRange(src_aspect_mask, src_is_3d),
},
};
const std::array middle_out_barrier{
@ -1072,7 +1090,7 @@ void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = dst_image,
.subresourceRange = dst_range.SubresourceRange(dst_aspect_mask),
.subresourceRange = dst_range.SubresourceRange(dst_aspect_mask, dst_is_3d),
},
};
const std::array post_barriers{
@ -1091,7 +1109,7 @@ void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = dst_image,
.subresourceRange = dst_range.SubresourceRange(dst_aspect_mask),
.subresourceRange = dst_range.SubresourceRange(dst_aspect_mask, dst_is_3d),
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
@ -1440,6 +1458,8 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
boost::container::small_vector<VkImageCopy, 16> vk_copies(copies.size());
const VkImageAspectFlags aspect_mask = dst.AspectMask();
ASSERT(aspect_mask == src.AspectMask());
const bool dst_is_3d = dst.info.type == ImageType::e3D;
const bool src_is_3d = src.info.type == ImageType::e3D;
std::ranges::transform(copies, vk_copies.begin(), [aspect_mask](const auto& copy) {
return MakeImageCopy(copy, aspect_mask);
@ -1447,7 +1467,8 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
const VkImage dst_image = dst.Handle();
const VkImage src_image = src.Handle();
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([dst_image, src_image, aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
scheduler.Record([dst_image, src_image, aspect_mask, dst_is_3d, src_is_3d,
vk_copies](vk::CommandBuffer cmdbuf) {
RangedBarrierRange dst_range;
RangedBarrierRange src_range;
for (const VkImageCopy& copy : vk_copies) {
@ -1467,7 +1488,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange = src_range.SubresourceRange(aspect_mask),
.subresourceRange = src_range.SubresourceRange(aspect_mask, src_is_3d),
},
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
@ -1481,7 +1502,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = dst_image,
.subresourceRange = dst_range.SubresourceRange(aspect_mask),
.subresourceRange = dst_range.SubresourceRange(aspect_mask, dst_is_3d),
},
};
const std::array post_barriers{
@ -1495,7 +1516,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange = src_range.SubresourceRange(aspect_mask),
.subresourceRange = src_range.SubresourceRange(aspect_mask, src_is_3d),
},
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
@ -1512,7 +1533,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = dst_image,
.subresourceRange = dst_range.SubresourceRange(aspect_mask),
.subresourceRange = dst_range.SubresourceRange(aspect_mask, dst_is_3d),
},
};
cmdbuf.PipelineBarrier(
@ -1691,10 +1712,12 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
const VkBuffer src_buffer = buffer;
const VkImage temp_vk_image = *temp_wrapper->original_image;
const VkImageAspectFlags vk_aspect_mask = temp_wrapper->aspect_mask;
const bool temp_is_3d = temp_info.type == ImageType::e3D;
scheduler->Record([src_buffer, temp_vk_image, vk_aspect_mask, vk_copies,
scheduler->Record([src_buffer, temp_vk_image, vk_aspect_mask, temp_is_3d, vk_copies,
keep = temp_wrapper](vk::CommandBuffer cmdbuf) {
CopyBufferToImage(cmdbuf, src_buffer, temp_vk_image, vk_aspect_mask, false, VideoCommon::FixSmallVectorADL(vk_copies));
CopyBufferToImage(cmdbuf, src_buffer, temp_vk_image, vk_aspect_mask, false,
temp_is_3d, VideoCommon::FixSmallVectorADL(vk_copies));
});
// Use MSAACopyPass to convert from non-MSAA to MSAA
@ -1730,10 +1753,12 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
const VkImage vk_image = *original_image;
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
const bool was_initialized = std::exchange(initialized, true);
const bool is_3d_image = info.type == ImageType::e3D;
scheduler->Record([src_buffer, vk_image, vk_aspect_mask, was_initialized,
scheduler->Record([src_buffer, vk_image, vk_aspect_mask, was_initialized, is_3d_image,
vk_copies](vk::CommandBuffer cmdbuf) {
CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, was_initialized, VideoCommon::FixSmallVectorADL(vk_copies));
CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, was_initialized,
is_3d_image, VideoCommon::FixSmallVectorADL(vk_copies));
});
if (is_rescaled) {

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
@ -22,16 +22,13 @@
#include <vulkan/vulkan.h>
// Define maintenance 7-9 extension names (not yet in official Vulkan headers)
// Define maintenance 7-8 extension names
#ifndef VK_KHR_MAINTENANCE_7_EXTENSION_NAME
#define VK_KHR_MAINTENANCE_7_EXTENSION_NAME "VK_KHR_maintenance7"
#endif
#ifndef VK_KHR_MAINTENANCE_8_EXTENSION_NAME
#define VK_KHR_MAINTENANCE_8_EXTENSION_NAME "VK_KHR_maintenance8"
#endif
#ifndef VK_KHR_MAINTENANCE_9_EXTENSION_NAME
#define VK_KHR_MAINTENANCE_9_EXTENSION_NAME "VK_KHR_maintenance9"
#endif
// Sanitize macros
#undef CreateEvent

View file

@ -1286,10 +1286,6 @@ void Device::RemoveUnsuitableExtensions() {
// VK_KHR_maintenance8
extensions.maintenance8 = loaded_extensions.contains(VK_KHR_MAINTENANCE_8_EXTENSION_NAME);
RemoveExtensionIfUnsuitable(extensions.maintenance8, VK_KHR_MAINTENANCE_8_EXTENSION_NAME);
// VK_KHR_maintenance9
extensions.maintenance9 = loaded_extensions.contains(VK_KHR_MAINTENANCE_9_EXTENSION_NAME);
RemoveExtensionIfUnsuitable(extensions.maintenance9, VK_KHR_MAINTENANCE_9_EXTENSION_NAME);
}
void Device::SetupFamilies(VkSurfaceKHR surface) {

View file

@ -97,7 +97,6 @@ VK_DEFINE_HANDLE(VmaAllocator)
EXTENSION(KHR, MAINTENANCE_3, maintenance3) \
EXTENSION(KHR, MAINTENANCE_7, maintenance7) \
EXTENSION(KHR, MAINTENANCE_8, maintenance8) \
EXTENSION(KHR, MAINTENANCE_9, maintenance9) \
EXTENSION(NV, DEVICE_DIAGNOSTICS_CONFIG, device_diagnostics_config) \
EXTENSION(NV, GEOMETRY_SHADER_PASSTHROUGH, geometry_shader_passthrough) \
EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \
@ -884,11 +883,6 @@ public:
return extensions.maintenance8;
}
/// Returns true if the device supports VK_KHR_maintenance9.
bool IsKhrMaintenance9Supported() const {
return extensions.maintenance9;
}
/// Returns true if the device supports UINT8 index buffer conversion via compute shader.
bool SupportsUint8Indices() const {
return features.bit8_storage.storageBuffer8BitAccess &&