diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/fetcher/SpacingItemDecoration.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/fetcher/SpacingItemDecoration.kt
index f3d000a739..b3ffcc2a35 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/fetcher/SpacingItemDecoration.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/fetcher/SpacingItemDecoration.kt
@@ -1,10 +1,11 @@
-// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
package org.yuzu.yuzu_emu.features.fetcher
import android.graphics.Rect
import android.view.View
+import androidx.recyclerview.widget.GridLayoutManager
import androidx.recyclerview.widget.RecyclerView
class SpacingItemDecoration(private val spacing: Int) : RecyclerView.ItemDecoration() {
@@ -15,8 +16,20 @@ class SpacingItemDecoration(private val spacing: Int) : RecyclerView.ItemDecorat
state: RecyclerView.State
) {
outRect.bottom = spacing
- if (parent.getChildAdapterPosition(view) == 0) {
+
+ val position = parent.getChildAdapterPosition(view)
+ if (position == RecyclerView.NO_POSITION) return
+
+ if (position == 0) {
outRect.top = spacing
+ return
+ }
+
+ // If the item is in the first row, but NOT in first column add top spacing as well
+ val layoutManager = parent.layoutManager
+ if (layoutManager is GridLayoutManager && layoutManager.spanSizeLookup.getSpanGroupIndex(position, layoutManager.spanCount) == 0) {
+ outRect.top = spacing
+ return
}
}
}
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt
index 2418003904..7b98fe9b44 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt
@@ -30,8 +30,6 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting {
BUFFER_REORDER_DISABLE("disable_buffer_reorder"),
RENDERER_DEBUG("debug"),
RENDERER_PATCH_OLD_QCOM_DRIVERS("patch_old_qcom_drivers"),
- RENDERER_VERTEX_INPUT_DYNAMIC_STATE("vertex_input_dynamic_state"),
- RENDERER_PROVOKING_VERTEX("provoking_vertex"),
RENDERER_DESCRIPTOR_INDEXING("descriptor_indexing"),
RENDERER_SAMPLE_SHADING("sample_shading"),
GPU_UNSWIZZLE_ENABLED("gpu_unswizzle_enabled"),
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt
index a8bd44983b..cabea73353 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt
@@ -141,20 +141,6 @@ abstract class SettingsItem(
valuesId = R.array.dynaStateValues
)
)
- put(
- SwitchSetting(
- BooleanSetting.RENDERER_PROVOKING_VERTEX,
- titleId = R.string.provoking_vertex,
- descriptionId = R.string.provoking_vertex_description
- )
- )
- put(
- SwitchSetting(
- BooleanSetting.RENDERER_VERTEX_INPUT_DYNAMIC_STATE,
- titleId = R.string.vertex_input_dynamic_state,
- descriptionId = R.string.vertex_input_dynamic_state_description
- )
- )
put(
SwitchSetting(
BooleanSetting.RENDERER_DESCRIPTOR_INDEXING,
@@ -349,15 +335,6 @@ abstract class SettingsItem(
valuesId = R.array.astcDecodingMethodValues
)
)
- put(
- SingleChoiceSetting(
- IntSetting.RENDERER_ASTC_RECOMPRESSION,
- titleId = R.string.astc_recompression,
- descriptionId = R.string.astc_recompression_description,
- choicesId = R.array.astcRecompressionMethodNames,
- valuesId = R.array.astcRecompressionMethodValues
- )
- )
put(
SingleChoiceSetting(
IntSetting.RENDERER_VRAM_USAGE_MODE,
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt
index 77104e0614..c43de4d5c7 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt
@@ -271,7 +271,6 @@ class SettingsFragmentPresenter(
add(IntSetting.MAX_ANISOTROPY.key)
add(IntSetting.RENDERER_VRAM_USAGE_MODE.key)
add(IntSetting.RENDERER_ASTC_DECODE_METHOD.key)
- add(IntSetting.RENDERER_ASTC_RECOMPRESSION.key)
add(BooleanSetting.SYNC_MEMORY_OPERATIONS.key)
add(BooleanSetting.RENDERER_USE_DISK_SHADER_CACHE.key)
@@ -291,8 +290,6 @@ class SettingsFragmentPresenter(
add(HeaderSetting(R.string.extensions))
add(IntSetting.RENDERER_DYNA_STATE.key)
- add(BooleanSetting.RENDERER_VERTEX_INPUT_DYNAMIC_STATE.key)
- add(BooleanSetting.RENDERER_PROVOKING_VERTEX.key)
add(BooleanSetting.RENDERER_DESCRIPTOR_INDEXING.key)
add(IntSetting.RENDERER_SAMPLE_SHADING.key)
diff --git a/src/android/app/src/main/res/values-ar/strings.xml b/src/android/app/src/main/res/values-ar/strings.xml
index 05f65ecf78..7f3982f49b 100644
--- a/src/android/app/src/main/res/values-ar/strings.xml
+++ b/src/android/app/src/main/res/values-ar/strings.xml
@@ -506,8 +506,6 @@
الحالة الديناميكية الموسعة
يتحكم هذا الخيار في عدد الميزات التي يمكن استخدامها في حالة الديناميكية الموسعة. تسمح الأرقام الأعلى بمزيد من الميزات ويمكن أن تزيد من الأداء، ولكنها قد تسبب مشاكل مع بعض برامج التشغيل والأجهزة.
معطل
- حالة ديناميكية لإدخال الرأس
- يتيح ميزة الحالة الديناميكية لإدخال الرأس لتحسين الجودة والأداء.
الرأس المثير
يحسن الإضاءة ومعالجة الرؤوس في بعض الألعاب. مدعوم فقط على وحدات معالجة الرسومات Vulkan 1.0+.
فهرسة الوصف
diff --git a/src/android/app/src/main/res/values-cs/strings.xml b/src/android/app/src/main/res/values-cs/strings.xml
index b56c21c9b0..c3ee5e6f53 100644
--- a/src/android/app/src/main/res/values-cs/strings.xml
+++ b/src/android/app/src/main/res/values-cs/strings.xml
@@ -488,8 +488,6 @@
Úroveň EDS
Určuje počet funkcí využívaných v rámci rozšířeného dynamického stavu API Vulkan (Extended Dynamic State). Vyšší hodnoty umožňují využít více funkcí a mohou zvýšit výkon, ale u některých ovladačů a výrobců grafických karet mohou způsobovat problémy s kompatibilitou.
Vypnuto
- Dynamický stav vstupu vrcholů (Vertex Input)
- Aktivuje funkci dynamického stavu vstupu vrcholů (Vertex Input Dynamic State) pro lepší kvalitu a výkon.
Určující vrchol
Zlepšuje osvětlení a zpracování vrcholů v některých hrách. Podporováno pouze na GPU s API Vulkan 1.0+.
Indexování deskriptorů
diff --git a/src/android/app/src/main/res/values-de/strings.xml b/src/android/app/src/main/res/values-de/strings.xml
index 7524402e6e..5c499a4080 100644
--- a/src/android/app/src/main/res/values-de/strings.xml
+++ b/src/android/app/src/main/res/values-de/strings.xml
@@ -486,8 +486,6 @@ Wird der Handheld-Modus verwendet, verringert es die Auflösung und erhöht die
Erweiterter dynamischer Status
Steuert die Anzahl der Funktionen, die im \"Vertex Input Dynamic State\" werden können. Höhere Werte ermöglichen mehr Funktionen und können die Leistung steigern, können aber bei einigen Treibern und Anbietern zu Problemen führen.
Deaktiviert
- Vertex Input Dynamic State
- Aktiviert die Funktion \"Vertex Input Dynamic State\" für bessere Qualität und Leistung.
Provokanter Vertex
Verbessert die Beleuchtung und die Vertex-Verarbeitung in einigen Spielen. Wird nur von GPUs mit Vulkan 1.0+ unterstützt.
Deskriptor-Indizierung
diff --git a/src/android/app/src/main/res/values-fr/strings.xml b/src/android/app/src/main/res/values-fr/strings.xml
index add275870d..4294e6d81e 100644
--- a/src/android/app/src/main/res/values-fr/strings.xml
+++ b/src/android/app/src/main/res/values-fr/strings.xml
@@ -436,8 +436,6 @@
Compile les shaders de manière asynchrone. Cela peut réduire les saccades mais peut aussi provoquer des problèmes graphiques.
État dynamique étendu
Désactivé
- État dynamique d\'entrée de sommet
- Active la fonctionnalité d\'état dynamique des entrées de sommets pour une meilleure qualité et de meilleures performances.
Provoque des Vertex
Améliore l`éclairage et la gestion des vertex dans certains jeux. Pris en charge uniquement par les GPU Vulkan 1.0+.
Indexation des descripteurs
diff --git a/src/android/app/src/main/res/values-pl/strings.xml b/src/android/app/src/main/res/values-pl/strings.xml
index 3d69cce8f3..6954b65fe1 100644
--- a/src/android/app/src/main/res/values-pl/strings.xml
+++ b/src/android/app/src/main/res/values-pl/strings.xml
@@ -488,8 +488,6 @@
Rozszerzony stan dynamiczny
Kontroluje liczbę funkcji, które mogą być używane w Extended Dynamic State. Wyższe wartości pozwalają na użycie większej liczby funkcji i mogą zwiększyć wydajność, ale mogą powodować problemy z niektórymi sterownikami i u niektórych producentów.
Wyłączone
- Dynamiczny stan wejścia wierzchołków
- Włącza funkcję dynamicznego stanu wejścia wierzchołków, poprawiając jakość i wydajność.
Wierzchołek prowokujący
Poprawia oświetlenie i obsługę wierzchołków w niektórych grach. Obsługiwane tylko przez GPU Vulkan 1.0+.
Indeksowanie deskryptorów
diff --git a/src/android/app/src/main/res/values-pt-rBR/strings.xml b/src/android/app/src/main/res/values-pt-rBR/strings.xml
index 08e2695d2e..5d1843fdae 100644
--- a/src/android/app/src/main/res/values-pt-rBR/strings.xml
+++ b/src/android/app/src/main/res/values-pt-rBR/strings.xml
@@ -471,8 +471,6 @@
Compila shaders de forma assíncrona. Isso pode reduzir engasgos, mas também pode introduzir falhas gráficas.
Extended Dynamic State
Desativado
- Vertex Input Dynamic State
- Ativa o recurso de vertex input dynamic state para melhor qualidade e desempenho.
Provoking Vertex
Vértice Provocante: Melhora a iluminação e o processamento de vértices em certos jogos. Suportado apenas em GPUs com Vulkan 1.0 ou superior.
Descriptor Indexing
diff --git a/src/android/app/src/main/res/values-ru/strings.xml b/src/android/app/src/main/res/values-ru/strings.xml
index a9a3cceaae..de2bcea0f0 100644
--- a/src/android/app/src/main/res/values-ru/strings.xml
+++ b/src/android/app/src/main/res/values-ru/strings.xml
@@ -498,8 +498,6 @@
Расширенное динамическое состояние
Управляет количеством функций, доступных в режиме «Расширенное динамическое состояние». Большее число позволяет задействовать больше функций и может повысить производительность, но способно вызывать проблемы с некоторыми драйверами и графикой.
Отключено
- Динамическое состояние ввода вершин
- Включает функцию динамического состояния ввода вершин для повышения качества и производительности
Определяющая вершина
Улучшает освещение и обработку вершин в некоторых играх. Поддерживается только ГПУ с Vulkan 1.0+.
Индексирование дескрипторов
diff --git a/src/android/app/src/main/res/values-uk/strings.xml b/src/android/app/src/main/res/values-uk/strings.xml
index ada2445d05..343d38103f 100644
--- a/src/android/app/src/main/res/values-uk/strings.xml
+++ b/src/android/app/src/main/res/values-uk/strings.xml
@@ -502,8 +502,6 @@
Розширений динамічний стан
Керує кількістю функцій, які можна використовувати в «Розширеному динамічному стані». Вище число дозволяє більше функцій і може покращити продуктивність, але може спричинити проблеми з деякими драйверами й виробниками.
Вимкнено
- Динамічний стан введення вершин
- Вмикає можливість динамічного стану введення вершин для кращих якості й продуктивності.
Провокативна вершина
Покращує освітлення та взаємодію з вершинами у деяких іграх. Лише для ГП з підтримкою Vulkan 1.0+.
Індексація дескрипторів
diff --git a/src/android/app/src/main/res/values-zh-rCN/strings.xml b/src/android/app/src/main/res/values-zh-rCN/strings.xml
index 08b55297a7..b1da5135dc 100644
--- a/src/android/app/src/main/res/values-zh-rCN/strings.xml
+++ b/src/android/app/src/main/res/values-zh-rCN/strings.xml
@@ -496,8 +496,6 @@
扩展动态状态
控制在扩展动态状态中可使用的函数数量。更高的数值允许启用更多功能,并可能提升性能,但同时也可能导致额外的图形问题。
已禁用
- 顶点输入动态状态
- 开启顶点输入动态状态功能来获得更好的质量和性能。
引发顶点
改善某些游戏中的光照和顶点处理。仅支持Vulkan 1.0+ GPU。
描述符索引
diff --git a/src/android/app/src/main/res/values-zh-rTW/strings.xml b/src/android/app/src/main/res/values-zh-rTW/strings.xml
index c7061ebc03..b593f97575 100644
--- a/src/android/app/src/main/res/values-zh-rTW/strings.xml
+++ b/src/android/app/src/main/res/values-zh-rTW/strings.xml
@@ -467,8 +467,6 @@
非同步編譯著色器。這可能會減少卡頓,但也可能導致圖形錯誤。
擴展動態狀態
已停用
- 頂點輸入動態狀態
- 啟用頂點輸入動態狀態以取得更佳的品質及性能
引發頂點
改善某些遊戲中的光照和頂點處理。僅支援Vulkan 1.0+ GPU。
描述符索引
diff --git a/src/android/app/src/main/res/values/arrays.xml b/src/android/app/src/main/res/values/arrays.xml
index 565decb390..31709eb89b 100644
--- a/src/android/app/src/main/res/values/arrays.xml
+++ b/src/android/app/src/main/res/values/arrays.xml
@@ -632,14 +632,12 @@
- @string/disabled
- ExtendedDynamicState 1
- ExtendedDynamicState 2
- - ExtendedDynamicState 3
- 0
- 1
- 2
- - 3
diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml
index 7d094effcb..de74b6c2eb 100644
--- a/src/android/app/src/main/res/values/strings.xml
+++ b/src/android/app/src/main/res/values/strings.xml
@@ -531,8 +531,6 @@
Extended Dynamic State
Controls the number of features that can be used in Extended Dynamic State. Higher numbers allow for more features and can increase performance, but may cause issues with some drivers and vendors.
Disabled
- Vertex Input Dynamic State
- Enables vertex input dynamic state feature for better quality and performance.
Provoking Vertex
Improves lighting and vertex handling in certain games. Only supported on Vulkan 1.0+ GPUs.
Descriptor Indexing
diff --git a/src/common/settings.h b/src/common/settings.h
index 7c6c0d062f..ff32ee42c3 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -453,7 +453,7 @@ struct Values {
Category::RendererAdvanced};
SwitchableSetting accelerate_astc{linkage,
#ifdef ANDROID
- AstcDecodeMode::Cpu,
+ AstcDecodeMode::Gpu,
#else
AstcDecodeMode::Gpu,
#endif
@@ -586,7 +586,7 @@ struct Values {
SwitchableSetting dyna_state{linkage,
#if defined (ANDROID) || defined (__APPLE__)
- ExtendedDynamicState::Disabled,
+ ExtendedDynamicState::EDS1,
#else
ExtendedDynamicState::EDS2,
#endif
@@ -601,14 +601,6 @@ struct Values {
Category::RendererExtensions,
Specialization::Scalar};
- SwitchableSetting vertex_input_dynamic_state{linkage,
-#if defined (ANDROID)
- false,
-#else
- true,
-#endif
- "vertex_input_dynamic_state", Category::RendererExtensions};
- SwitchableSetting provoking_vertex{linkage, false, "provoking_vertex", Category::RendererExtensions};
SwitchableSetting descriptor_indexing{linkage, false, "descriptor_indexing", Category::RendererExtensions};
Setting renderer_debug{linkage, false, "debug", Category::RendererDebug};
diff --git a/src/common/settings_enums.h b/src/common/settings_enums.h
index 638be4127f..5de0641b69 100644
--- a/src/common/settings_enums.h
+++ b/src/common/settings_enums.h
@@ -154,7 +154,7 @@ ENUM(GpuUnswizzleSize, VerySmall, Small, Normal, Large, VeryLarge)
ENUM(GpuUnswizzle, VeryLow, Low, Normal, Medium, High)
ENUM(GpuUnswizzleChunk, VeryLow, Low, Normal, Medium, High)
ENUM(TemperatureUnits, Celsius, Fahrenheit)
-ENUM(ExtendedDynamicState, Disabled, EDS1, EDS2, EDS3);
+ENUM(ExtendedDynamicState, Disabled, EDS1, EDS2);
ENUM(GpuLogLevel, Off, Errors, Standard, Verbose, All)
ENUM(GameListMode, TreeView, GridView);
ENUM(SpeedMode, Standard, Turbo, Slow);
diff --git a/src/core/hle/service/prepo/prepo.cpp b/src/core/hle/service/prepo/prepo.cpp
index 4fc59d0e10..bfc5539903 100644
--- a/src/core/hle/service/prepo/prepo.cpp
+++ b/src/core/hle/service/prepo/prepo.cpp
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
@@ -28,8 +28,10 @@ public:
{10101, &PlayReport::SaveReportWithUser, "SaveReportWithUserOld"},
{10102, &PlayReport::SaveReport, "SaveReportOld2"},
{10103, &PlayReport::SaveReportWithUser, "SaveReportWithUserOld2"},
- {10104, &PlayReport::SaveReport, "SaveReport"},
- {10105, &PlayReport::SaveReportWithUser, "SaveReportWithUser"},
+ {10104, &PlayReport::SaveReport, "SaveReportOld3"},
+ {10105, &PlayReport::SaveReportWithUser, "SaveReportWithUserOld3"},
+ {10106, &PlayReport::SaveReport, "SaveReport"},
+ {10107, &PlayReport::SaveReportWithUser, "SaveReportWithUser"},
{10200, &PlayReport::RequestImmediateTransmission, "RequestImmediateTransmission"},
{10300, &PlayReport::GetTransmissionStatus, "GetTransmissionStatus"},
{10400, &PlayReport::GetSystemSessionId, "GetSystemSessionId"},
diff --git a/src/core/reporter.h b/src/core/reporter.h
index db1ca3ba0c..1eee8da31f 100644
--- a/src/core/reporter.h
+++ b/src/core/reporter.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -53,6 +56,7 @@ public:
enum class PlayReportType {
Old,
Old2,
+ Old3,
New,
System,
};
diff --git a/src/qt_common/config/shared_translation.cpp b/src/qt_common/config/shared_translation.cpp
index f49c43ee2a..bf312a183e 100644
--- a/src/qt_common/config/shared_translation.cpp
+++ b/src/qt_common/config/shared_translation.cpp
@@ -368,17 +368,6 @@ std::unique_ptr InitializeTranslations(QObject* parent)
"Higher states allow for more features and can increase performance, but may cause "
"additional graphical issues."));
- INSERT(Settings,
- vertex_input_dynamic_state,
- tr("Vertex Input Dynamic State"),
- tr("Enables vertex input dynamic state feature for better quality and performance."));
-
- INSERT(Settings,
- provoking_vertex,
- tr("Provoking Vertex"),
- tr("Improves lighting and vertex handling in some games.\n"
- "Only Vulkan 1.0+ devices support this extension."));
-
INSERT(Settings,
descriptor_indexing,
tr("Descriptor Indexing"),
@@ -425,6 +414,9 @@ std::unique_ptr InitializeTranslations(QObject* parent)
"their resolution, details and supported controllers and depending on this setting.\n"
"Setting to Handheld can help improve performance for low end systems."));
INSERT(Settings, current_user, QString(), QString());
+ INSERT(Settings, serial_unit, tr("Unit Serial"), QString());
+ INSERT(Settings, serial_battery, tr("Battery Serial"), QString());
+ INSERT(Settings, debug_knobs, tr("Debug knobs"), QString());
// Controls
@@ -796,7 +788,6 @@ std::unique_ptr ComboboxEnumeration(QObject* parent)
PAIR(ExtendedDynamicState, Disabled, tr("Disabled")),
PAIR(ExtendedDynamicState, EDS1, tr("ExtendedDynamicState 1")),
PAIR(ExtendedDynamicState, EDS2, tr("ExtendedDynamicState 2")),
- PAIR(ExtendedDynamicState, EDS3, tr("ExtendedDynamicState 3")),
}});
translations->insert({Settings::EnumMetadata::Index(),
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 313a1deb30..97124c0bfa 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
@@ -11,15 +11,159 @@
#include
#include
+#include "common/logging/log.h"
#include "common/settings.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/ir/program.h"
namespace Shader::Backend::SPIRV {
namespace {
+[[nodiscard]] constexpr std::string_view StageName(Stage stage) noexcept {
+ switch (stage) {
+ case Stage::VertexA:
+ return "VertexA";
+ case Stage::VertexB:
+ return "VertexB";
+ case Stage::TessellationControl:
+ return "TessellationControl";
+ case Stage::TessellationEval:
+ return "TessellationEval";
+ case Stage::Geometry:
+ return "Geometry";
+ case Stage::Fragment:
+ return "Fragment";
+ case Stage::Compute:
+ return "Compute";
+ }
+ return "Unknown";
+}
+
+[[nodiscard]] constexpr std::string_view DenormModeName(bool flush, bool preserve) noexcept {
+ if (flush && preserve) {
+ return "Flush+Preserve";
+ }
+ if (flush) {
+ return "Flush";
+ }
+ if (preserve) {
+ return "Preserve";
+ }
+ return "None";
+}
+
+[[nodiscard]] constexpr bool IsFp32RoundingRelevantOpcode(IR::Opcode opcode) noexcept {
+ switch (opcode) {
+ case IR::Opcode::FPAdd32:
+ case IR::Opcode::FPFma32:
+ case IR::Opcode::FPMul32:
+ case IR::Opcode::FPRoundEven32:
+ case IR::Opcode::FPFloor32:
+ case IR::Opcode::FPCeil32:
+ case IR::Opcode::FPTrunc32:
+ case IR::Opcode::FPOrdEqual32:
+ case IR::Opcode::FPUnordEqual32:
+ case IR::Opcode::FPOrdNotEqual32:
+ case IR::Opcode::FPUnordNotEqual32:
+ case IR::Opcode::FPOrdLessThan32:
+ case IR::Opcode::FPUnordLessThan32:
+ case IR::Opcode::FPOrdGreaterThan32:
+ case IR::Opcode::FPUnordGreaterThan32:
+ case IR::Opcode::FPOrdLessThanEqual32:
+ case IR::Opcode::FPUnordLessThanEqual32:
+ case IR::Opcode::FPOrdGreaterThanEqual32:
+ case IR::Opcode::FPUnordGreaterThanEqual32:
+ case IR::Opcode::ConvertF16F32:
+ case IR::Opcode::ConvertF64F32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+struct Fp32RoundingUsage {
+ u32 rz_count{};
+ bool has_conflicting_rounding{};
+};
+
+Fp32RoundingUsage CollectFp32RoundingUsage(const IR::Program& program) {
+ Fp32RoundingUsage usage{};
+ for (const IR::Block* const block : program.post_order_blocks) {
+ for (const IR::Inst& inst : block->Instructions()) {
+ if (!IsFp32RoundingRelevantOpcode(inst.GetOpcode())) {
+ continue;
+ }
+ switch (inst.Flags().rounding) {
+ case IR::FpRounding::RZ:
+ ++usage.rz_count;
+ break;
+ case IR::FpRounding::RN:
+ case IR::FpRounding::RM:
+ case IR::FpRounding::RP:
+ usage.has_conflicting_rounding = true;
+ break;
+ case IR::FpRounding::DontCare:
+ break;
+ }
+ }
+ }
+ return usage;
+}
+
+void LogRzBackendSummary(const Profile& profile, const IR::Program& program, bool optimize) {
+ if (!Settings::values.renderer_debug) {
+ return;
+ }
+ const Fp32RoundingUsage usage{CollectFp32RoundingUsage(program)};
+ if (usage.rz_count == 0) {
+ return;
+ }
+
+ LOG_INFO(Shader_SPIRV,
+ "SPV_RZ {} start={:#010x} optimize={} support_float_controls={} separate_denorm_behavior={} separate_rounding_mode={} support_fp32_rounding_rtz={} broken_fp16_float_controls={} fp16_denorm={} fp32_denorm={} signed_nan16={} signed_nan32={} signed_nan64={} rz_inst_count={} mixed_fp32_rounding={}",
+ StageName(program.stage), program.start_address, optimize,
+ profile.support_float_controls, profile.support_separate_denorm_behavior,
+ profile.support_separate_rounding_mode, profile.support_fp32_rounding_rtz,
+ profile.has_broken_fp16_float_controls,
+ DenormModeName(program.info.uses_fp16_denorms_flush,
+ program.info.uses_fp16_denorms_preserve),
+ DenormModeName(program.info.uses_fp32_denorms_flush,
+ program.info.uses_fp32_denorms_preserve),
+ profile.support_fp16_signed_zero_nan_preserve,
+ profile.support_fp32_signed_zero_nan_preserve,
+ profile.support_fp64_signed_zero_nan_preserve, usage.rz_count,
+ usage.has_conflicting_rounding);
+}
+
+void SetupRoundingControl(const Profile& profile, const IR::Program& program, EmitContext& ctx,
+ Id main_func) {
+ const Fp32RoundingUsage usage{CollectFp32RoundingUsage(program)};
+ if (usage.rz_count == 0) {
+ return;
+ }
+ if (usage.has_conflicting_rounding) {
+ if (Settings::values.renderer_debug) {
+ LOG_INFO(Shader_SPIRV,
+ "SPV_RZ {} start={:#010x} skipping_fp32_rtz_execution_mode reason=mixed_rounding",
+ StageName(program.stage), program.start_address);
+ }
+ return;
+ }
+ if (!profile.support_fp32_rounding_rtz) {
+ if (Settings::values.renderer_debug) {
+ LOG_INFO(Shader_SPIRV,
+ "SPV_RZ {} start={:#010x} skipping_fp32_rtz_execution_mode reason=unsupported_fp32_rtz",
+ StageName(program.stage), program.start_address);
+ }
+ return;
+ }
+ ctx.AddCapability(spv::Capability::RoundingModeRTZ);
+ ctx.AddExecutionMode(main_func, spv::ExecutionMode::RoundingModeRTZ, 32U);
+}
+
template
struct FuncTraits {};
thread_local std::unique_ptr thread_optimizer;
@@ -503,12 +647,14 @@ void PatchPhiNodes(IR::Program& program, EmitContext& ctx) {
std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
IR::Program& program, Bindings& bindings, bool optimize) {
+ LogRzBackendSummary(profile, program, optimize);
EmitContext ctx{profile, runtime_info, program, bindings};
const Id main{DefineMain(ctx, program)};
DefineEntryPoint(program, ctx, main);
if (profile.support_float_controls) {
ctx.AddExtension("SPV_KHR_float_controls");
SetupDenormControl(profile, program, ctx, main);
+ SetupRoundingControl(profile, program, ctx, main);
SetupSignedNanCapabilities(profile, program, ctx, main);
}
SetupCapabilities(profile, program.info, ctx);
@@ -516,6 +662,12 @@ std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
PatchPhiNodes(program, ctx);
if (!optimize) {
+ if (Settings::values.renderer_debug && ctx.log_rz_fp_controls) {
+ const std::vector spirv{ctx.Assemble()};
+ LOG_INFO(Shader_SPIRV, "SPV_RZ {} start={:#010x} assembled_words={} optimized_words={} validator_run=false",
+ StageName(program.stage), program.start_address, spirv.size(), spirv.size());
+ return spirv;
+ }
return ctx.Assemble();
} else {
std::vector spirv = ctx.Assemble();
@@ -535,6 +687,11 @@ std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
"Failed to optimize SPIRV shader output, continuing without optimization");
result = std::move(spirv);
}
+ if (Settings::values.renderer_debug && ctx.log_rz_fp_controls) {
+ LOG_INFO(Shader_SPIRV,
+ "SPV_RZ {} start={:#010x} assembled_words={} optimized_words={} validator_run=false",
+ StageName(program.stage), program.start_address, spirv.size(), result.size());
+ }
return result;
}
}
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index db11def7b2..beab29ec8a 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -491,6 +491,9 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
}
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
+ if (!ctx.runtime_info.active_color_outputs[index]) {
+ return;
+ }
const Id component_id{ctx.Const(component)};
const AttributeType type{ctx.runtime_info.color_output_types[index]};
if (type == AttributeType::Float) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
index d921913b4a..1957c26df9 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
@@ -1,16 +1,60 @@
-// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
+
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
+#include "common/logging/log.h"
+#include "common/settings.h"
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
namespace Shader::Backend::SPIRV {
namespace {
+[[nodiscard]] constexpr std::string_view StageName(Stage stage) noexcept {
+ switch (stage) {
+ case Stage::VertexA:
+ return "VertexA";
+ case Stage::VertexB:
+ return "VertexB";
+ case Stage::TessellationControl:
+ return "TessellationControl";
+ case Stage::TessellationEval:
+ return "TessellationEval";
+ case Stage::Geometry:
+ return "Geometry";
+ case Stage::Fragment:
+ return "Fragment";
+ case Stage::Compute:
+ return "Compute";
+ }
+ return "Unknown";
+}
+
+[[nodiscard]] constexpr std::string_view FmzName(IR::FmzMode fmz_mode) noexcept {
+ switch (fmz_mode) {
+ case IR::FmzMode::DontCare:
+ return "DontCare";
+ case IR::FmzMode::FTZ:
+ return "FTZ";
+ case IR::FmzMode::FMZ:
+ return "FMZ";
+ case IR::FmzMode::None:
+ return "None";
+ }
+ return "Unknown";
+}
+
Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) {
const auto flags{inst->Flags()};
+ if (Settings::values.renderer_debug && ctx.log_rz_fp_controls &&
+ flags.rounding == IR::FpRounding::RZ) {
+ LOG_INFO(Shader_SPIRV,
+ "SPV_RZ_EMIT {} start={:#010x} ir_opcode={} spirv_op=OpFMul no_contraction={} fmz={} float_controls_ext={}",
+ StageName(ctx.stage), ctx.start_address, inst->GetOpcode(),
+ flags.no_contraction, FmzName(flags.fmz_mode), ctx.profile.support_float_controls);
+ }
if (flags.no_contraction) {
ctx.Decorate(op, spv::Decoration::NoContraction);
}
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
index 4bff810547..2fd0f3bd1a 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -14,6 +14,25 @@
namespace Shader::Backend::SPIRV {
namespace {
+Id GetResultType(EmitContext& ctx, NumericType numeric_type) {
+ switch (numeric_type) {
+ case NumericType::Float:
+ return ctx.F32[4];
+ case NumericType::SignedInt:
+ return ctx.S32[4];
+ case NumericType::UnsignedInt:
+ return ctx.U32[4];
+ }
+ throw LogicError("Invalid numeric type {}", static_cast(numeric_type));
+}
+
+NumericType GetTextureNumericType(EmitContext& ctx, const IR::TextureInstInfo& info) {
+ if (info.type == TextureType::Buffer) {
+ return ctx.texture_buffers.at(info.descriptor_index).numeric_type;
+ }
+ return ctx.textures.at(info.descriptor_index).numeric_type;
+}
+
class ImageOperands {
public:
[[maybe_unused]] static constexpr bool ImageSampleOffsetAllowed = false;
@@ -201,10 +220,10 @@ Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& ind
const TextureBufferDefinition& def{ctx.texture_buffers.at(info.descriptor_index)};
if (def.count > 1) {
const Id idx{index.IsImmediate() ? ctx.Const(index.U32()) : ctx.Def(index)};
- const Id ptr{ctx.OpAccessChain(ctx.image_buffer_type, def.id, idx)};
- return ctx.OpLoad(ctx.image_buffer_type, ptr);
+ const Id ptr{ctx.OpAccessChain(def.pointer_type, def.id, idx)};
+ return ctx.OpLoad(def.image_type, ptr);
}
- return ctx.OpLoad(ctx.image_buffer_type, def.id);
+ return ctx.OpLoad(def.image_type, def.id);
} else {
const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
if (def.count > 1) {
@@ -216,23 +235,24 @@ Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& ind
}
}
-std::pair Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) {
+std::pair Image(EmitContext& ctx, const IR::Value& index,
+ IR::TextureInstInfo info) {
if (info.type == TextureType::Buffer) {
const ImageBufferDefinition def{ctx.image_buffers.at(info.descriptor_index)};
if (def.count > 1) {
const Id idx{index.IsImmediate() ? ctx.Const(index.U32()) : ctx.Def(index)};
const Id ptr{ctx.OpAccessChain(def.pointer_type, def.id, idx)};
- return {ctx.OpLoad(def.image_type, ptr), def.is_integer};
+ return {ctx.OpLoad(def.image_type, ptr), def.numeric_type};
}
- return {ctx.OpLoad(def.image_type, def.id), def.is_integer};
+ return {ctx.OpLoad(def.image_type, def.id), def.numeric_type};
} else {
const ImageDefinition def{ctx.images.at(info.descriptor_index)};
if (def.count > 1) {
const Id idx{index.IsImmediate() ? ctx.Const(index.U32()) : ctx.Def(index)};
const Id ptr{ctx.OpAccessChain(def.pointer_type, def.id, idx)};
- return {ctx.OpLoad(def.image_type, ptr), def.is_integer};
+ return {ctx.OpLoad(def.image_type, ptr), def.numeric_type};
}
- return {ctx.OpLoad(def.image_type, def.id), def.is_integer};
+ return {ctx.OpLoad(def.image_type, def.id), def.numeric_type};
}
}
@@ -461,8 +481,9 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value&
if (ctx.stage == Stage::Fragment) {
const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0,
bias_lc, offset);
+ const Id result_type{GetResultType(ctx, GetTextureNumericType(ctx, info))};
return Emit(&EmitContext::OpImageSparseSampleImplicitLod,
- &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4],
+ &EmitContext::OpImageSampleImplicitLod, ctx, inst, result_type,
Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
} else {
// We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as
@@ -470,8 +491,9 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value&
// derivatives
const Id lod{ctx.Const(0.0f)};
const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset);
+ const Id result_type{GetResultType(ctx, GetTextureNumericType(ctx, info))};
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
- &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
+ &EmitContext::OpImageSampleExplicitLod, ctx, inst, result_type,
Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
}
}
@@ -480,12 +502,14 @@ Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value&
Id lod, const IR::Value& offset) {
const auto info{inst->Flags()};
const ImageOperands operands(ctx, false, true, false, lod, offset);
+ const NumericType numeric_type{GetTextureNumericType(ctx, info)};
+ const Id result_type{GetResultType(ctx, numeric_type)};
Id result = Emit(&EmitContext::OpImageSparseSampleExplicitLod,
- &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
+ &EmitContext::OpImageSampleExplicitLod, ctx, inst, result_type,
Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
#ifdef ANDROID
- if (Settings::values.fix_bloom_effects.GetValue()) {
+ if (numeric_type == NumericType::Float && Settings::values.fix_bloom_effects.GetValue()) {
result = ctx.OpVectorTimesScalar(ctx.F32[4], result, ctx.Const(0.98f));
}
#endif
@@ -529,8 +553,9 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
if (ctx.profile.need_gather_subpixel_offset) {
coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords);
}
+ const Id result_type{GetResultType(ctx, GetTextureNumericType(ctx, info))};
return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst,
- ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component),
+ result_type, Texture(ctx, info, index), coords, ctx.Const(info.gather_component),
operands.MaskOptional(), operands.Span());
}
@@ -558,8 +583,10 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c
lod = Id{};
}
const ImageOperands operands(lod, ms);
- return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
- TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
+ const Id result_type{GetResultType(ctx, GetTextureNumericType(ctx, info))};
+ return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst,
+ result_type, TextureImage(ctx, info, index), coords, operands.MaskOptional(),
+ operands.Span());
}
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod,
@@ -609,8 +636,9 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
ctx.Def(offset), {}, lod_clamp)
: ImageOperands(ctx, info.has_lod_clamp != 0, derivatives,
info.num_derivatives, offset, lod_clamp);
+ const Id result_type{GetResultType(ctx, GetTextureNumericType(ctx, info))};
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
- &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
+ &EmitContext::OpImageSampleExplicitLod, ctx, inst, result_type,
Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
}
@@ -620,11 +648,11 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id co
LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host");
return ctx.ConstantNull(ctx.U32[4]);
}
- const auto [image, is_integer] = Image(ctx, index, info);
- const Id result_type{is_integer ? ctx.U32[4] : ctx.F32[4]};
+ const auto [image, numeric_type] = Image(ctx, index, info);
+ const Id result_type{GetResultType(ctx, numeric_type)};
Id color{Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst,
result_type, image, coords, std::nullopt, std::span{})};
- if (!is_integer) {
+ if (numeric_type == NumericType::Float) {
color = ctx.OpBitcast(ctx.U32[4], color);
}
return color;
@@ -632,8 +660,8 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id co
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) {
const auto info{inst->Flags()};
- const auto [image, is_integer] = Image(ctx, index, info);
- if (!is_integer) {
+ const auto [image, numeric_type] = Image(ctx, index, info);
+ if (numeric_type == NumericType::Float) {
color = ctx.OpBitcast(ctx.F32[4], color);
}
ctx.OpImageWrite(image, coords, color);
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index b9a24496c9..7b422a4a41 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -17,6 +17,7 @@
#include "common/div_ceil.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
namespace Shader::Backend::SPIRV {
namespace {
@@ -28,9 +29,21 @@ enum class Operation {
FPMax,
};
+Id GetNumericTypeId(EmitContext& ctx, NumericType numeric_type) {
+ switch (numeric_type) {
+ case NumericType::Float:
+ return ctx.F32[1];
+ case NumericType::SignedInt:
+ return ctx.S32[1];
+ case NumericType::UnsignedInt:
+ return ctx.U32[1];
+ }
+ throw InvalidArgument("Invalid numeric type {}", static_cast(numeric_type));
+}
+
Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
const spv::ImageFormat format{spv::ImageFormat::Unknown};
- const Id type{ctx.F32[1]};
+ const Id type{GetNumericTypeId(ctx, desc.numeric_type)};
const bool depth{desc.is_depth};
const bool ms{desc.is_multisample};
switch (desc.type) {
@@ -461,7 +474,44 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
IR::Program& program, Bindings& bindings)
: Sirit::Module(profile_.supported_spirv), profile{profile_}, runtime_info{runtime_info_},
- stage{program.stage}, texture_rescaling_index{bindings.texture_scaling_index},
+ stage{program.stage}, start_address{program.start_address},
+ log_rz_fp_controls{std::ranges::any_of(program.post_order_blocks, [](const IR::Block* block) {
+ return std::ranges::any_of(block->Instructions(), [](const IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::FPAdd16:
+ case IR::Opcode::FPFma16:
+ case IR::Opcode::FPMul16:
+ case IR::Opcode::FPRoundEven16:
+ case IR::Opcode::FPFloor16:
+ case IR::Opcode::FPCeil16:
+ case IR::Opcode::FPTrunc16:
+ case IR::Opcode::FPAdd32:
+ case IR::Opcode::FPFma32:
+ case IR::Opcode::FPMul32:
+ case IR::Opcode::FPRoundEven32:
+ case IR::Opcode::FPFloor32:
+ case IR::Opcode::FPCeil32:
+ case IR::Opcode::FPTrunc32:
+ case IR::Opcode::FPOrdEqual32:
+ case IR::Opcode::FPUnordEqual32:
+ case IR::Opcode::FPOrdNotEqual32:
+ case IR::Opcode::FPUnordNotEqual32:
+ case IR::Opcode::FPOrdLessThan32:
+ case IR::Opcode::FPUnordLessThan32:
+ case IR::Opcode::FPOrdGreaterThan32:
+ case IR::Opcode::FPUnordGreaterThan32:
+ case IR::Opcode::FPOrdLessThanEqual32:
+ case IR::Opcode::FPUnordLessThanEqual32:
+ case IR::Opcode::FPOrdGreaterThanEqual32:
+ case IR::Opcode::FPUnordGreaterThanEqual32:
+ case IR::Opcode::ConvertF16F32:
+ case IR::Opcode::ConvertF64F32:
+ return inst.Flags().rounding == IR::FpRounding::RZ;
+ default:
+ return false;
+ }
+ });
+ })}, texture_rescaling_index{bindings.texture_scaling_index},
image_rescaling_index{bindings.image_scaling_index} {
const bool is_unified{profile.unified_descriptor_binding};
u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer};
@@ -1304,22 +1354,26 @@ void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) {
if (info.texture_buffer_descriptors.empty()) {
return;
}
- const spv::ImageFormat format{spv::ImageFormat::Unknown};
- image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format);
-
- const Id type{TypePointer(spv::StorageClass::UniformConstant, image_buffer_type)};
texture_buffers.reserve(info.texture_buffer_descriptors.size());
for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) {
if (desc.count != 1) {
throw NotImplementedException("Array of texture buffers");
}
+ const spv::ImageFormat format{spv::ImageFormat::Unknown};
+ const Id image_type{
+ TypeImage(GetNumericTypeId(*this, desc.numeric_type), spv::Dim::Buffer, 0U, false,
+ false, 1, format)};
+ const Id type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)};
Decorate(id, spv::Decoration::Binding, binding);
Decorate(id, spv::Decoration::DescriptorSet, 0U);
Name(id, NameOf(stage, desc, "texbuf"));
texture_buffers.push_back({
.id = id,
+ .image_type = image_type,
+ .pointer_type = type,
.count = desc.count,
+ .numeric_type = desc.numeric_type,
});
if (profile.supported_spirv >= 0x00010400) {
interfaces.push_back(id);
@@ -1332,7 +1386,7 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) {
image_buffers.reserve(info.image_buffer_descriptors.size());
for (const ImageBufferDescriptor& desc : info.image_buffer_descriptors) {
const spv::ImageFormat format{GetImageFormat(desc.format)};
- const Id sampled_type{desc.is_integer ? U32[1] : F32[1]};
+ const Id sampled_type{GetNumericTypeId(*this, desc.numeric_type)};
const Id image_type{
TypeImage(sampled_type, spv::Dim::Buffer, false, false, false, 2, format)};
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
@@ -1345,7 +1399,7 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) {
.image_type = image_type,
.pointer_type = pointer_type,
.count = desc.count,
- .is_integer = desc.is_integer,
+ .numeric_type = desc.numeric_type,
});
if (profile.supported_spirv >= 0x00010400) {
interfaces.push_back(id);
@@ -1372,6 +1426,7 @@ void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_in
.image_type = image_type,
.count = desc.count,
.is_multisample = desc.is_multisample,
+ .numeric_type = desc.numeric_type,
});
if (profile.supported_spirv >= 0x00010400) {
interfaces.push_back(id);
@@ -1387,7 +1442,7 @@ void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_in
void EmitContext::DefineImages(const Info& info, u32& binding, u32& scaling_index) {
images.reserve(info.image_descriptors.size());
for (const ImageDescriptor& desc : info.image_descriptors) {
- const Id sampled_type{desc.is_integer ? U32[1] : F32[1]};
+ const Id sampled_type{GetNumericTypeId(*this, desc.numeric_type)};
const Id image_type{ImageType(*this, desc, sampled_type)};
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
@@ -1399,7 +1454,7 @@ void EmitContext::DefineImages(const Info& info, u32& binding, u32& scaling_inde
.image_type = image_type,
.pointer_type = pointer_type,
.count = desc.count,
- .is_integer = desc.is_integer,
+ .numeric_type = desc.numeric_type,
});
if (profile.supported_spirv >= 0x00010400) {
interfaces.push_back(id);
@@ -1671,8 +1726,10 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
case Stage::Fragment:
for (u32 index = 0; index < 8; ++index) {
const bool need_dual_source = runtime_info.dual_source_blend && index <= 1;
- if (!need_dual_source && !info.stores_frag_color[index] &&
- !profile.need_declared_frag_colors) {
+ const bool should_declare = runtime_info.active_color_outputs[index] &&
+ (info.stores_frag_color[index] ||
+ profile.need_declared_frag_colors);
+ if (!need_dual_source && !should_declare) {
continue;
}
const Id type{GetAttributeType(*this, runtime_info.color_output_types[index])};
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index de56809a98..21151bab38 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -41,11 +41,15 @@ struct TextureDefinition {
Id image_type;
u32 count;
bool is_multisample;
+ NumericType numeric_type;
};
struct TextureBufferDefinition {
Id id;
+ Id image_type;
+ Id pointer_type;
u32 count;
+ NumericType numeric_type;
};
struct ImageBufferDefinition {
@@ -53,7 +57,7 @@ struct ImageBufferDefinition {
Id image_type;
Id pointer_type;
u32 count;
- bool is_integer;
+ NumericType numeric_type;
};
struct ImageDefinition {
@@ -61,7 +65,7 @@ struct ImageDefinition {
Id image_type;
Id pointer_type;
u32 count;
- bool is_integer;
+ NumericType numeric_type;
};
struct UniformDefinitions {
@@ -212,6 +216,8 @@ public:
const Profile& profile;
const RuntimeInfo& runtime_info;
Stage stage{};
+ u32 start_address{};
+ bool log_rz_fp_controls{};
Id void_id{};
Id U1{};
diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h
index 6b4a05c598..1836a18bd3 100644
--- a/src/shader_recompiler/frontend/ir/program.h
+++ b/src/shader_recompiler/frontend/ir/program.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -20,6 +23,7 @@ struct Program {
BlockList post_order_blocks;
Info info;
Stage stage{};
+ u32 start_address{};
std::array workgroup_size{};
OutputTopology output_topology{};
u32 output_vertices{};
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index f156192c13..6cca023330 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -5,10 +5,13 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include
+#include
#include
+#include
#include
#include
+#include "common/logging/log.h"
#include "common/settings.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
@@ -22,6 +25,214 @@
namespace Shader::Maxwell {
namespace {
+struct FpControlHistogram {
+ std::array total{};
+ std::array no_contraction{};
+ std::array, 2> rounding{};
+ std::array, 2> fmz{};
+ std::array, 5>, 2> combos{};
+};
+
+[[nodiscard]] constexpr std::string_view StageName(Stage stage) noexcept {
+ switch (stage) {
+ case Stage::VertexA:
+ return "VertexA";
+ case Stage::VertexB:
+ return "VertexB";
+ case Stage::TessellationControl:
+ return "TessellationControl";
+ case Stage::TessellationEval:
+ return "TessellationEval";
+ case Stage::Geometry:
+ return "Geometry";
+ case Stage::Fragment:
+ return "Fragment";
+ case Stage::Compute:
+ return "Compute";
+ }
+ return "Unknown";
+}
+
+[[nodiscard]] constexpr std::string_view RoundingName(IR::FpRounding rounding) noexcept {
+ switch (rounding) {
+ case IR::FpRounding::DontCare:
+ return "DontCare";
+ case IR::FpRounding::RN:
+ return "RN";
+ case IR::FpRounding::RM:
+ return "RM";
+ case IR::FpRounding::RP:
+ return "RP";
+ case IR::FpRounding::RZ:
+ return "RZ";
+ }
+ return "Unknown";
+}
+
+[[nodiscard]] constexpr std::string_view FmzName(IR::FmzMode fmz_mode) noexcept {
+ switch (fmz_mode) {
+ case IR::FmzMode::DontCare:
+ return "DontCare";
+ case IR::FmzMode::FTZ:
+ return "FTZ";
+ case IR::FmzMode::FMZ:
+ return "FMZ";
+ case IR::FmzMode::None:
+ return "None";
+ }
+ return "Unknown";
+}
+
+[[nodiscard]] constexpr std::optional FpControlBucket(const IR::Opcode opcode) noexcept {
+ switch (opcode) {
+ case IR::Opcode::FPAdd16:
+ case IR::Opcode::FPFma16:
+ case IR::Opcode::FPMul16:
+ case IR::Opcode::FPRoundEven16:
+ case IR::Opcode::FPFloor16:
+ case IR::Opcode::FPCeil16:
+ case IR::Opcode::FPTrunc16:
+ return 0;
+ case IR::Opcode::FPAdd32:
+ case IR::Opcode::FPFma32:
+ case IR::Opcode::FPMul32:
+ case IR::Opcode::FPRoundEven32:
+ case IR::Opcode::FPFloor32:
+ case IR::Opcode::FPCeil32:
+ case IR::Opcode::FPTrunc32:
+ case IR::Opcode::FPOrdEqual32:
+ case IR::Opcode::FPUnordEqual32:
+ case IR::Opcode::FPOrdNotEqual32:
+ case IR::Opcode::FPUnordNotEqual32:
+ case IR::Opcode::FPOrdLessThan32:
+ case IR::Opcode::FPUnordLessThan32:
+ case IR::Opcode::FPOrdGreaterThan32:
+ case IR::Opcode::FPUnordGreaterThan32:
+ case IR::Opcode::FPOrdLessThanEqual32:
+ case IR::Opcode::FPUnordLessThanEqual32:
+ case IR::Opcode::FPOrdGreaterThanEqual32:
+ case IR::Opcode::FPUnordGreaterThanEqual32:
+ case IR::Opcode::ConvertF16F32:
+ case IR::Opcode::ConvertF64F32:
+ return 1;
+ default:
+ return std::nullopt;
+ }
+}
+
+FpControlHistogram CollectFpControlHistogram(const IR::Program& program) {
+ FpControlHistogram histogram{};
+ for (const IR::Block* const block : program.post_order_blocks) {
+ for (const IR::Inst& inst : block->Instructions()) {
+ const std::optional bucket{FpControlBucket(inst.GetOpcode())};
+ if (!bucket) {
+ continue;
+ }
+ const auto flags{inst.Flags()};
+ ++histogram.total[*bucket];
+ if (flags.no_contraction) {
+ ++histogram.no_contraction[*bucket];
+ }
+ ++histogram.rounding[*bucket][static_cast(flags.rounding)];
+ ++histogram.fmz[*bucket][static_cast(flags.fmz_mode)];
+ ++histogram.combos[*bucket][static_cast(flags.rounding)]
+ [static_cast(flags.fmz_mode)];
+ }
+ }
+ return histogram;
+}
+
+void LogRzFpControlTrace(Environment& env, const IR::Program& program) {
+ std::array totals{};
+ for (const IR::Block* const block : program.post_order_blocks) {
+ for (const IR::Inst& inst : block->Instructions()) {
+ const std::optional bucket{FpControlBucket(inst.GetOpcode())};
+ if (!bucket) {
+ continue;
+ }
+ const auto flags{inst.Flags()};
+ if (flags.rounding != IR::FpRounding::RZ) {
+ continue;
+ }
+ ++totals[*bucket];
+ }
+ }
+
+ if (totals[0] == 0 && totals[1] == 0) {
+ return;
+ }
+
+ constexpr std::array precision_names{"fp16", "fp32"};
+ LOG_INFO(Shader,
+ "FP_RZ {} shader start={:#010x} blocks={} post_order_blocks={} fp16={} fp32={}",
+ StageName(program.stage), env.StartAddress(), program.blocks.size(),
+ program.post_order_blocks.size(), totals[0], totals[1]);
+
+ for (const IR::Block* const block : program.post_order_blocks) {
+ u32 inst_index{};
+ for (const IR::Inst& inst : block->Instructions()) {
+ const std::optional bucket{FpControlBucket(inst.GetOpcode())};
+ if (!bucket) {
+ ++inst_index;
+ continue;
+ }
+ const auto flags{inst.Flags()};
+ if (flags.rounding != IR::FpRounding::RZ) {
+ ++inst_index;
+ continue;
+ }
+ LOG_INFO(Shader,
+ "FP_RZ {} start={:#010x} block_order={} inst_index={} precision={} opcode={} no_contraction={} fmz={}",
+ StageName(program.stage), env.StartAddress(), block->GetOrder(), inst_index,
+ precision_names[*bucket], inst.GetOpcode(), flags.no_contraction,
+ FmzName(flags.fmz_mode));
+ ++inst_index;
+ }
+ }
+}
+
+void LogFpControlHistogram(const IR::Program& program) {
+ const FpControlHistogram histogram{CollectFpControlHistogram(program)};
+ if (histogram.total[0] == 0 && histogram.total[1] == 0) {
+ return;
+ }
+
+ LOG_INFO(Shader, "FP_HIST {} shader blocks={} post_order_blocks={}",
+ StageName(program.stage), program.blocks.size(), program.post_order_blocks.size());
+
+ constexpr std::array precision_names{"fp16", "fp32"};
+ for (size_t bucket = 0; bucket < precision_names.size(); ++bucket) {
+ if (histogram.total[bucket] == 0) {
+ continue;
+ }
+
+ LOG_INFO(Shader,
+ "FP_HIST {} total={} no_contraction={} rounding[DontCare={}, RN={}, RM={}, RP={}, RZ={}] fmz[DontCare={}, FTZ={}, FMZ={}, None={}]",
+ precision_names[bucket], histogram.total[bucket], histogram.no_contraction[bucket],
+ histogram.rounding[bucket][static_cast(IR::FpRounding::DontCare)],
+ histogram.rounding[bucket][static_cast(IR::FpRounding::RN)],
+ histogram.rounding[bucket][static_cast(IR::FpRounding::RM)],
+ histogram.rounding[bucket][static_cast(IR::FpRounding::RP)],
+ histogram.rounding[bucket][static_cast(IR::FpRounding::RZ)],
+ histogram.fmz[bucket][static_cast(IR::FmzMode::DontCare)],
+ histogram.fmz[bucket][static_cast(IR::FmzMode::FTZ)],
+ histogram.fmz[bucket][static_cast(IR::FmzMode::FMZ)],
+ histogram.fmz[bucket][static_cast(IR::FmzMode::None)]);
+
+ for (size_t rounding = 0; rounding < histogram.combos[bucket].size(); ++rounding) {
+ for (size_t fmz = 0; fmz < histogram.combos[bucket][rounding].size(); ++fmz) {
+ const u32 count{histogram.combos[bucket][rounding][fmz]};
+ if (count == 0) {
+ continue;
+ }
+ LOG_INFO(Shader, "FP_HIST {} combo {} / {} = {}", precision_names[bucket],
+ RoundingName(static_cast(rounding)),
+ FmzName(static_cast(fmz)), count);
+ }
+ }
+ }
+}
+
IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
size_t num_syntax_blocks{};
for (const auto& node : syntax_list) {
@@ -247,6 +458,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool;
constexpr u32 DESCRIPTOR_SIZE = 8;
constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast(std::countr_zero(DESCRIPTOR_SIZE));
+NumericType GetNumericType(TexturePixelFormat format) {
+ const auto pixel_format = static_cast(format);
+ if (!VideoCore::Surface::IsPixelFormatInteger(pixel_format)) {
+ return NumericType::Float;
+ }
+ return VideoCore::Surface::IsPixelFormatSignedInteger(pixel_format)
+ ? NumericType::SignedInt
+ : NumericType::UnsignedInt;
+}
+
IR::Opcode IndexedInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::BindlessImageSampleImplicitLod:
@@ -199,11 +210,6 @@ static inline TexturePixelFormat ReadTexturePixelFormatCached(Environment& env,
const ConstBufferAddr& cbuf) {
return env.ReadTexturePixelFormat(GetTextureHandleCached(env, cbuf));
}
-static inline bool IsTexturePixelFormatIntegerCached(Environment& env,
- const ConstBufferAddr& cbuf) {
- return env.IsTexturePixelFormatInteger(GetTextureHandleCached(env, cbuf));
-}
-
std::optional Track(const IR::Value& value, Environment& env);
static inline std::optional TrackCached(const IR::Value& v, Environment& env) {
@@ -430,7 +436,8 @@ public:
u32 Add(const TextureBufferDescriptor& desc) {
return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) {
- return desc.cbuf_index == existing.cbuf_index &&
+ return desc.numeric_type == existing.numeric_type &&
+ desc.cbuf_index == existing.cbuf_index &&
desc.cbuf_offset == existing.cbuf_offset &&
desc.shift_left == existing.shift_left &&
desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
@@ -449,13 +456,13 @@ public:
})};
image_buffer_descriptors[index].is_written |= desc.is_written;
image_buffer_descriptors[index].is_read |= desc.is_read;
- image_buffer_descriptors[index].is_integer |= desc.is_integer;
return index;
}
u32 Add(const TextureDescriptor& desc) {
const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) {
return desc.type == existing.type && desc.is_depth == existing.is_depth &&
+ desc.numeric_type == existing.numeric_type &&
desc.has_secondary == existing.has_secondary &&
desc.cbuf_index == existing.cbuf_index &&
desc.cbuf_offset == existing.cbuf_offset &&
@@ -479,7 +486,6 @@ public:
})};
image_descriptors[index].is_written |= desc.is_written;
image_descriptors[index].is_read |= desc.is_read;
- image_descriptors[index].is_integer |= desc.is_integer;
return index;
}
@@ -651,13 +657,13 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
}
const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead};
const bool is_read{inst->GetOpcode() != IR::Opcode::ImageWrite};
- const bool is_integer{IsTexturePixelFormatIntegerCached(env, cbuf)};
+ const NumericType numeric_type{GetNumericType(ReadTexturePixelFormatCached(env, cbuf))};
if (flags.type == TextureType::Buffer) {
index = descriptors.Add(ImageBufferDescriptor{
.format = flags.image_format,
.is_written = is_written,
.is_read = is_read,
- .is_integer = is_integer,
+ .numeric_type = numeric_type,
.cbuf_index = cbuf.index,
.cbuf_offset = cbuf.offset,
.count = cbuf.count,
@@ -669,7 +675,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
.format = flags.image_format,
.is_written = is_written,
.is_read = is_read,
- .is_integer = is_integer,
+ .numeric_type = numeric_type,
.cbuf_index = cbuf.index,
.cbuf_offset = cbuf.offset,
.count = cbuf.count,
@@ -681,6 +687,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
default:
if (flags.type == TextureType::Buffer) {
index = descriptors.Add(TextureBufferDescriptor{
+ .numeric_type = GetNumericType(ReadTexturePixelFormatCached(env, cbuf)),
.has_secondary = cbuf.has_secondary,
.cbuf_index = cbuf.index,
.cbuf_offset = cbuf.offset,
@@ -696,6 +703,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
.type = flags.type,
.is_depth = flags.is_depth != 0,
.is_multisample = is_multisample,
+ .numeric_type = GetNumericType(ReadTexturePixelFormatCached(env, cbuf)),
.has_secondary = cbuf.has_secondary,
.cbuf_index = cbuf.index,
.cbuf_offset = cbuf.offset,
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index 90e46bb1ba..5a8993834c 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -18,6 +21,7 @@ struct Profile {
bool support_float_controls{};
bool support_separate_denorm_behavior{};
bool support_separate_rounding_mode{};
+ bool support_fp32_rounding_rtz{};
bool support_fp16_denorm_preserve{};
bool support_fp32_denorm_preserve{};
bool support_fp16_denorm_flush{};
diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h
index be10a9bb08..b8888504bb 100644
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@@ -111,6 +111,9 @@ struct RuntimeInfo {
/// Output types for each color attachment
std::array color_output_types{};
+ /// Fragment color outputs that are active for the current pipeline.
+ std::array active_color_outputs{true, true, true, true, true, true, true, true};
+
/// Dual source blending
bool dual_source_blend{};
};
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index dfacc06802..87dd14fa46 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -38,6 +38,12 @@ enum class TextureType : u32 {
};
constexpr u32 NUM_TEXTURE_TYPES = 9;
+enum class NumericType : u8 {
+ Float,
+ SignedInt,
+ UnsignedInt,
+};
+
enum class TexturePixelFormat {
A8B8G8R8_UNORM,
A8B8G8R8_SNORM,
@@ -177,6 +183,7 @@ struct StorageBufferDescriptor {
};
struct TextureBufferDescriptor {
+ NumericType numeric_type;
bool has_secondary;
u32 cbuf_index;
u32 cbuf_offset;
@@ -195,7 +202,7 @@ struct ImageBufferDescriptor {
ImageFormat format;
bool is_written;
bool is_read;
- bool is_integer;
+ NumericType numeric_type;
u32 cbuf_index;
u32 cbuf_offset;
u32 count;
@@ -209,6 +216,7 @@ struct TextureDescriptor {
TextureType type;
bool is_depth;
bool is_multisample;
+ NumericType numeric_type;
bool has_secondary;
u32 cbuf_index;
u32 cbuf_offset;
@@ -228,7 +236,7 @@ struct ImageDescriptor {
ImageFormat format;
bool is_written;
bool is_read;
- bool is_integer;
+ NumericType numeric_type;
u32 cbuf_index;
u32 cbuf_offset;
u32 count;
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index 0596329392..08524bd854 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -14,9 +14,12 @@
#include
#include
#include
-#include
#include
+#include
+#include
+#include
+
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "common/literals.h"
@@ -94,10 +97,10 @@ static constexpr Binding NULL_BINDING{
template
struct HostBindings {
- boost::container::small_vector buffers;
- boost::container::small_vector offsets;
- boost::container::small_vector sizes;
- boost::container::small_vector strides;
+ boost::container::static_vector buffers;
+ boost::container::static_vector offsets;
+ boost::container::static_vector sizes;
+ boost::container::static_vector strides;
u32 min_index{NUM_VERTEX_BUFFERS};
u32 max_index{0};
};
diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp
index 2539997d53..d07c7e2a83 100644
--- a/src/video_core/control/channel_state.cpp
+++ b/src/video_core/control/channel_state.cpp
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
@@ -19,12 +22,12 @@ ChannelState::ChannelState(s32 bind_id_) : bind_id{bind_id_}, initialized{} {}
void ChannelState::Init(Core::System& system, GPU& gpu, u64 program_id_) {
ASSERT(memory_manager);
program_id = program_id_;
- dma_pusher = std::make_unique(system, gpu, *memory_manager, *this);
- maxwell_3d = std::make_unique(system, *memory_manager);
- fermi_2d = std::make_unique(*memory_manager);
- kepler_compute = std::make_unique(system, *memory_manager);
- maxwell_dma = std::make_unique(system, *memory_manager);
- kepler_memory = std::make_unique(system, *memory_manager);
+ dma_pusher.emplace(system, gpu, *memory_manager, *this);
+ maxwell_3d.emplace(system, *memory_manager);
+ fermi_2d.emplace(*memory_manager);
+ kepler_compute.emplace(system, *memory_manager);
+ maxwell_dma.emplace(system, *memory_manager);
+ kepler_memory.emplace(system, *memory_manager);
initialized = true;
}
diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h
index b385f4939f..2984d2e09e 100644
--- a/src/video_core/control/channel_state.h
+++ b/src/video_core/control/channel_state.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
@@ -6,6 +9,12 @@
#include
#include "common/common_types.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_memory.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_dma.h"
+#include "video_core/dma_pusher.h"
namespace Core {
class System;
@@ -18,49 +27,34 @@ class RasterizerInterface;
namespace Tegra {
class GPU;
-
-namespace Engines {
-class Puller;
-class Fermi2D;
-class Maxwell3D;
-class MaxwellDMA;
-class KeplerCompute;
-class KeplerMemory;
-} // namespace Engines
-
class MemoryManager;
-class DmaPusher;
namespace Control {
struct ChannelState {
explicit ChannelState(s32 bind_id);
- ChannelState(const ChannelState& state) = delete;
- ChannelState& operator=(const ChannelState&) = delete;
- ChannelState(ChannelState&& other) noexcept = default;
- ChannelState& operator=(ChannelState&& other) noexcept = default;
void Init(Core::System& system, GPU& gpu, u64 program_id);
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
- s32 bind_id = -1;
- u64 program_id = 0;
/// 3D engine
- std::unique_ptr maxwell_3d;
+ std::optional maxwell_3d;
/// 2D engine
- std::unique_ptr fermi_2d;
+ std::optional fermi_2d;
/// Compute engine
- std::unique_ptr kepler_compute;
+ std::optional kepler_compute;
/// DMA engine
- std::unique_ptr maxwell_dma;
+ std::optional maxwell_dma;
/// Inline memory engine
- std::unique_ptr kepler_memory;
-
+ std::optional kepler_memory;
+ /// NV01 Timer
+ std::optional nv01_timer;
+ std::optional dma_pusher;
std::shared_ptr memory_manager;
- std::unique_ptr dma_pusher;
-
+ s32 bind_id = -1;
+ u64 program_id = 0;
bool initialized{};
};
diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h
index e271ecab59..bf3bd66aca 100644
--- a/src/video_core/engines/engine_interface.h
+++ b/src/video_core/engines/engine_interface.h
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
@@ -15,6 +15,7 @@
namespace Tegra::Engines {
enum class EngineTypes : u32 {
+ Nv01Timer,
KeplerCompute,
Maxwell3D,
Fermi2D,
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 7dbb8f6617..e48f294a5a 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
@@ -26,8 +26,15 @@ namespace Tegra::Engines {
constexpr u32 MacroRegistersStart = 0xE00;
Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
- : draw_manager{std::make_unique(this)}, system{system_},
- memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} {
+ : draw_manager{std::make_unique(this)}, system{system_}
+ , memory_manager{memory_manager_}
+#ifdef ARCHITECTURE_x86_64
+ , macro_engine(bool(Settings::values.disable_macro_jit))
+#else
+ , macro_engine(true)
+#endif
+ , upload_state{memory_manager, regs.upload}
+{
dirty.flags.flip();
InitializeRegisterDefaults();
execution_mask.reset();
@@ -328,9 +335,9 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
shadow_state.shadow_ram_control = static_cast(nonshadow_argument);
return;
case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr):
- return macro_engine->ClearCode(regs.load_mme.instruction_ptr);
+ return macro_engine.ClearCode(regs.load_mme.instruction_ptr);
case MAXWELL3D_REG_INDEX(load_mme.instruction):
- return macro_engine->AddCode(regs.load_mme.instruction_ptr, argument);
+ return macro_engine.AddCode(regs.load_mme.instruction_ptr, argument);
case MAXWELL3D_REG_INDEX(load_mme.start_address):
return ProcessMacroBind(argument);
case MAXWELL3D_REG_INDEX(falcon[4]):
@@ -398,7 +405,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector& parameters)
((method - MacroRegistersStart) >> 1) % static_cast(macro_positions.size());
// Execute the current macro.
- macro_engine->Execute(macro_positions[entry], parameters);
+ macro_engine.Execute(*this, macro_positions[entry], parameters);
draw_manager->DrawDeferred();
}
@@ -464,7 +471,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
}
void Maxwell3D::ProcessMacroUpload(u32 data) {
- macro_engine->AddCode(regs.load_mme.instruction_ptr++, data);
+ macro_engine.AddCode(regs.load_mme.instruction_ptr++, data);
}
void Maxwell3D::ProcessMacroBind(u32 data) {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 5312c04b6f..b73082b7ef 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -2258,7 +2258,7 @@ public:
/// Returns whether the vertex array specified by index is supposed to be
/// accessed per instance or not.
bool IsInstancingEnabled(std::size_t index) const {
- return is_instanced[index];
+ return bool(is_instanced[index]); //FUCK YOU MSVC
}
};
@@ -3203,7 +3203,7 @@ private:
std::vector macro_params;
/// Interpreter for the macro codes uploaded to the GPU.
- std::optional macro_engine;
+ MacroEngine macro_engine;
Upload::State upload_state;
diff --git a/src/video_core/engines/nv01_timer.h b/src/video_core/engines/nv01_timer.h
new file mode 100644
index 0000000000..a8e60f9f53
--- /dev/null
+++ b/src/video_core/engines/nv01_timer.h
@@ -0,0 +1,52 @@
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include
+#include
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/engine_interface.h"
+#include "video_core/engines/engine_upload.h"
+
+namespace Core {
+class System;
+}
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace Tegra::Engines {
+class Nv01Timer final : public EngineInterface {
+public:
+ explicit Nv01Timer(Core::System& system_, MemoryManager& memory_manager)
+ : system{system_}
+ {}
+ ~Nv01Timer() override;
+
+ /// Write the value to the register identified by method.
+ void CallMethod(u32 method, u32 method_argument, bool is_last_call) override {
+ LOG_DEBUG(HW_GPU, "method={}, argument={}, is_last_call={}", method, method_argument, is_last_call);
+ }
+
+ /// Write multiple values to the register identified by method.
+ void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override {
+ LOG_DEBUG(HW_GPU, "method={}, base_start={}, amount={}, pending={}", method, fmt::ptr(base_start), amount, methods_pending);
+ }
+
+ struct Regs {
+ // No fucking idea
+ INSERT_PADDING_BYTES_NOINIT(0x48);
+ } regs{};
+private:
+ void ConsumeSinkImpl() override {}
+ Core::System& system;
+};
+}
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
index 8dd34c04ab..b5b4e5d7fa 100644
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
@@ -34,24 +37,22 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) {
bound_engines[method_call.subchannel] = engine_id;
switch (engine_id) {
case EngineID::FERMI_TWOD_A:
- dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel,
- EngineTypes::Fermi2D);
+ dma_pusher.BindSubchannel(&*channel_state.fermi_2d, method_call.subchannel, EngineTypes::Fermi2D);
break;
case EngineID::MAXWELL_B:
- dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel,
- EngineTypes::Maxwell3D);
+ dma_pusher.BindSubchannel(&*channel_state.maxwell_3d, method_call.subchannel, EngineTypes::Maxwell3D);
break;
case EngineID::KEPLER_COMPUTE_B:
- dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel,
- EngineTypes::KeplerCompute);
+ dma_pusher.BindSubchannel(&*channel_state.kepler_compute, method_call.subchannel, EngineTypes::KeplerCompute);
break;
case EngineID::MAXWELL_DMA_COPY_A:
- dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel,
- EngineTypes::MaxwellDMA);
+ dma_pusher.BindSubchannel(&*channel_state.maxwell_dma, method_call.subchannel, EngineTypes::MaxwellDMA);
break;
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
- dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel,
- EngineTypes::KeplerMemory);
+ dma_pusher.BindSubchannel(&*channel_state.kepler_memory, method_call.subchannel, EngineTypes::KeplerMemory);
+ break;
+ case EngineID::NV01_TIMER:
+ dma_pusher.BindSubchannel(&*channel_state.nv01_timer, method_call.subchannel, EngineTypes::Nv01Timer);
break;
default:
UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
@@ -209,24 +210,22 @@ void Puller::CallEngineMethod(const MethodCall& method_call) {
switch (engine) {
case EngineID::FERMI_TWOD_A:
- channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument,
- method_call.IsLastCall());
+ channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
break;
case EngineID::MAXWELL_B:
- channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument,
- method_call.IsLastCall());
+ channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
break;
case EngineID::KEPLER_COMPUTE_B:
- channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument,
- method_call.IsLastCall());
+ channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
break;
case EngineID::MAXWELL_DMA_COPY_A:
- channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument,
- method_call.IsLastCall());
+ channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
break;
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
- channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument,
- method_call.IsLastCall());
+ channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
+ break;
+ case EngineID::NV01_TIMER:
+ channel_state.nv01_timer->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
break;
default:
UNIMPLEMENTED_MSG("Unimplemented engine");
@@ -255,6 +254,9 @@ void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_s
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
break;
+ case EngineID::NV01_TIMER:
+ channel_state.nv01_timer->CallMultiMethod(method, base_start, amount, methods_pending);
+ break;
default:
UNIMPLEMENTED_MSG("Unimplemented engine");
break;
diff --git a/src/video_core/engines/puller.h b/src/video_core/engines/puller.h
index d4175ee945..fe5102e3ed 100644
--- a/src/video_core/engines/puller.h
+++ b/src/video_core/engines/puller.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
// SPDX-FileCopyrightText: 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
@@ -20,6 +23,7 @@ class MemoryManager;
class DmaPusher;
enum class EngineID {
+ NV01_TIMER = 0x0004,
FERMI_TWOD_A = 0x902D, // 2D Engine
MAXWELL_B = 0xB197, // 3D Engine
KEPLER_COMPUTE_B = 0xB1C0,
diff --git a/src/video_core/macro.cpp b/src/video_core/macro.cpp
index 3fe69be4dd..0d1fe0a52b 100644
--- a/src/video_core/macro.cpp
+++ b/src/video_core/macro.cpp
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
@@ -10,6 +10,7 @@
#include
#include
+#include
#ifdef ARCHITECTURE_x86_64
// xbyak hates human beings
#ifdef __GNUC__
@@ -73,601 +74,411 @@ bool IsTopologySafe(Maxwell3D::Regs::PrimitiveTopology topology) {
}
}
-class HLEMacroImpl : public CachedMacro {
-public:
- explicit HLEMacroImpl(Maxwell3D& maxwell3d_)
- : CachedMacro(maxwell3d_)
- {}
-};
+} // Anonymous namespace
-/// @note: these macros have two versions, a normal and extended version, with the extended version
-/// also assigning the base vertex/instance.
-template
-class HLE_DrawArraysIndirect final : public HLEMacroImpl {
-public:
- explicit HLE_DrawArraysIndirect(Maxwell3D& maxwell3d_)
- : HLEMacroImpl(maxwell3d_)
- {}
-
- void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override {
- auto topology = static_cast(parameters[0]);
- if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
- Fallback(parameters);
- return;
- }
-
- auto& params = maxwell3d.draw_manager->GetIndirectParams();
- params.is_byte_count = false;
- params.is_indexed = false;
- params.include_count = false;
- params.count_start_address = 0;
- params.indirect_start_address = maxwell3d.GetMacroAddress(1);
- params.buffer_size = 4 * sizeof(u32);
- params.max_draw_counts = 1;
- params.stride = 0;
-
- if (extended) {
- maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
- maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
- }
-
- maxwell3d.draw_manager->DrawArrayIndirect(topology);
-
- if (extended) {
- maxwell3d.engine_state = Maxwell3D::EngineHint::None;
- maxwell3d.replace_table.clear();
- }
+void HLE_DrawArraysIndirect::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) {
+ auto topology = static_cast(parameters[0]);
+ if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
+ Fallback(maxwell3d, parameters);
+ return;
}
-private:
- void Fallback(const std::vector& parameters) {
- SCOPE_EXIT {
- if (extended) {
- maxwell3d.engine_state = Maxwell3D::EngineHint::None;
- maxwell3d.replace_table.clear();
- }
- };
- maxwell3d.RefreshParameters();
- const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
+ auto& params = maxwell3d.draw_manager->GetIndirectParams();
+ params.is_byte_count = false;
+ params.is_indexed = false;
+ params.include_count = false;
+ params.count_start_address = 0;
+ params.indirect_start_address = maxwell3d.GetMacroAddress(1);
+ params.buffer_size = 4 * sizeof(u32);
+ params.max_draw_counts = 1;
+ params.stride = 0;
- auto topology = static_cast(parameters[0]);
- const u32 vertex_first = parameters[3];
- const u32 vertex_count = parameters[1];
-
- if (!IsTopologySafe(topology) && size_t(maxwell3d.GetMaxCurrentVertices()) < size_t(vertex_first) + size_t(vertex_count)) {
- ASSERT(false && "Faulty draw!");
- return;
- }
-
- const u32 base_instance = parameters[4];
- if (extended) {
- maxwell3d.regs.global_base_instance_index = base_instance;
- maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
- maxwell3d.SetHLEReplacementAttributeType(
- 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
- }
-
- maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance,
- instance_count);
-
- if (extended) {
- maxwell3d.regs.global_base_instance_index = 0;
- maxwell3d.engine_state = Maxwell3D::EngineHint::None;
- maxwell3d.replace_table.clear();
- }
- }
-};
-
-/*
- * @note: these macros have two versions, a normal and extended version, with the extended version
- * also assigning the base vertex/instance.
- */
-template
-class HLE_DrawIndexedIndirect final : public HLEMacroImpl {
-public:
- explicit HLE_DrawIndexedIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
- void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override {
- auto topology = static_cast(parameters[0]);
- if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
- Fallback(parameters);
- return;
- }
-
- const u32 estimate = static_cast(maxwell3d.EstimateIndexBufferSize());
- const u32 element_base = parameters[4];
- const u32 base_instance = parameters[5];
- maxwell3d.regs.vertex_id_base = element_base;
- maxwell3d.regs.global_base_vertex_index = element_base;
- maxwell3d.regs.global_base_instance_index = base_instance;
- maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
- if (extended) {
- maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
- maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
- maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
- }
- auto& params = maxwell3d.draw_manager->GetIndirectParams();
- params.is_byte_count = false;
- params.is_indexed = true;
- params.include_count = false;
- params.count_start_address = 0;
- params.indirect_start_address = maxwell3d.GetMacroAddress(1);
- params.buffer_size = 5 * sizeof(u32);
- params.max_draw_counts = 1;
- params.stride = 0;
- maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
- maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate);
- maxwell3d.regs.vertex_id_base = 0x0;
- maxwell3d.regs.global_base_vertex_index = 0x0;
- maxwell3d.regs.global_base_instance_index = 0x0;
- if (extended) {
- maxwell3d.engine_state = Maxwell3D::EngineHint::None;
- maxwell3d.replace_table.clear();
- }
- }
-
-private:
- void Fallback(const std::vector& parameters) {
- maxwell3d.RefreshParameters();
- const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
- const u32 element_base = parameters[4];
- const u32 base_instance = parameters[5];
- maxwell3d.regs.vertex_id_base = element_base;
- maxwell3d.regs.global_base_vertex_index = element_base;
- maxwell3d.regs.global_base_instance_index = base_instance;
- maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
- if (extended) {
- maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
- maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
- maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
- }
-
- maxwell3d.draw_manager->DrawIndex(Tegra::Maxwell3D::Regs::PrimitiveTopology(parameters[0]), parameters[3], parameters[1], element_base, base_instance, instance_count);
-
- maxwell3d.regs.vertex_id_base = 0x0;
- maxwell3d.regs.global_base_vertex_index = 0x0;
- maxwell3d.regs.global_base_instance_index = 0x0;
- if (extended) {
- maxwell3d.engine_state = Maxwell3D::EngineHint::None;
- maxwell3d.replace_table.clear();
- }
- }
-};
-
-class HLE_MultiLayerClear final : public HLEMacroImpl {
-public:
- explicit HLE_MultiLayerClear(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
- void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override {
- maxwell3d.RefreshParameters();
- ASSERT(parameters.size() == 1);
-
- const Maxwell3D::Regs::ClearSurface clear_params{parameters[0]};
- const u32 rt_index = clear_params.RT;
- const u32 num_layers = maxwell3d.regs.rt[rt_index].depth;
- ASSERT(clear_params.layer == 0);
-
- maxwell3d.regs.clear_surface.raw = clear_params.raw;
- maxwell3d.draw_manager->Clear(num_layers);
- }
-};
-
-class HLE_MultiDrawIndexedIndirectCount final : public HLEMacroImpl {
-public:
- explicit HLE_MultiDrawIndexedIndirectCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
- void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override {
- const auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[2]);
- if (!IsTopologySafe(topology)) {
- Fallback(parameters);
- return;
- }
-
- const u32 start_indirect = parameters[0];
- const u32 end_indirect = parameters[1];
- if (start_indirect >= end_indirect) {
- // Nothing to do.
- return;
- }
-
- const u32 padding = parameters[3]; // padding is in words
-
- // size of each indirect segment
- const u32 indirect_words = 5 + padding;
- const u32 stride = indirect_words * sizeof(u32);
- const std::size_t draw_count = end_indirect - start_indirect;
- const u32 estimate = static_cast(maxwell3d.EstimateIndexBufferSize());
- maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
- auto& params = maxwell3d.draw_manager->GetIndirectParams();
- params.is_byte_count = false;
- params.is_indexed = true;
- params.include_count = true;
- params.count_start_address = maxwell3d.GetMacroAddress(4);
- params.indirect_start_address = maxwell3d.GetMacroAddress(5);
- params.buffer_size = stride * draw_count;
- params.max_draw_counts = draw_count;
- params.stride = stride;
- maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ if (extended) {
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
- maxwell3d.SetHLEReplacementAttributeType(
- 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
- maxwell3d.SetHLEReplacementAttributeType(
- 0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
- maxwell3d.SetHLEReplacementAttributeType(0, 0x648,
- Maxwell3D::HLEReplacementAttributeType::DrawID);
- maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate);
+ maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
+ }
+
+ maxwell3d.draw_manager->DrawArrayIndirect(topology);
+
+ if (extended) {
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
maxwell3d.replace_table.clear();
}
-
-private:
- void Fallback(const std::vector& parameters) {
- SCOPE_EXIT {
- // Clean everything.
- maxwell3d.regs.vertex_id_base = 0x0;
+}
+void HLE_DrawArraysIndirect::Fallback(Engines::Maxwell3D& maxwell3d, std::span parameters) {
+ SCOPE_EXIT {
+ if (extended) {
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
maxwell3d.replace_table.clear();
- };
- maxwell3d.RefreshParameters();
- const u32 start_indirect = parameters[0];
- const u32 end_indirect = parameters[1];
- if (start_indirect >= end_indirect) {
- // Nothing to do.
- return;
- }
- const auto topology = static_cast(parameters[2]);
- const u32 padding = parameters[3];
- const std::size_t max_draws = parameters[4];
-
- const u32 indirect_words = 5 + padding;
- const std::size_t first_draw = start_indirect;
- const std::size_t effective_draws = end_indirect - start_indirect;
- const std::size_t last_draw = start_indirect + (std::min)(effective_draws, max_draws);
-
- for (std::size_t index = first_draw; index < last_draw; index++) {
- const std::size_t base = index * indirect_words + 5;
- const u32 base_vertex = parameters[base + 3];
- const u32 base_instance = parameters[base + 4];
- maxwell3d.regs.vertex_id_base = base_vertex;
- maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
- maxwell3d.SetHLEReplacementAttributeType(
- 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
- maxwell3d.SetHLEReplacementAttributeType(
- 0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
- maxwell3d.CallMethod(0x8e3, 0x648, true);
- maxwell3d.CallMethod(0x8e4, static_cast(index), true);
- maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
- maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base],
- base_vertex, base_instance, parameters[base + 1]);
}
+ };
+ maxwell3d.RefreshParameters();
+ const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
+ auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[0]);
+ const u32 vertex_first = parameters[3];
+ const u32 vertex_count = parameters[1];
+ if (!IsTopologySafe(topology) && size_t(maxwell3d.GetMaxCurrentVertices()) < size_t(vertex_first) + size_t(vertex_count)) {
+ ASSERT(false && "Faulty draw!");
+ return;
}
-};
-
-class HLE_DrawIndirectByteCount final : public HLEMacroImpl {
-public:
- explicit HLE_DrawIndirectByteCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
- void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override {
- const bool force = maxwell3d.Rasterizer().HasDrawTransformFeedback();
-
- auto topology = static_cast(parameters[0] & 0xFFFFU);
- if (!force && (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology))) {
- Fallback(parameters);
- return;
- }
- auto& params = maxwell3d.draw_manager->GetIndirectParams();
- params.is_byte_count = true;
- params.is_indexed = false;
- params.include_count = false;
- params.count_start_address = 0;
- params.indirect_start_address = maxwell3d.GetMacroAddress(2);
- params.buffer_size = 4;
- params.max_draw_counts = 1;
- params.stride = parameters[1];
- maxwell3d.regs.draw.begin = parameters[0];
- maxwell3d.regs.draw_auto_stride = parameters[1];
- maxwell3d.regs.draw_auto_byte_count = parameters[2];
-
- maxwell3d.draw_manager->DrawArrayIndirect(topology);
+ const u32 base_instance = parameters[4];
+ if (extended) {
+ maxwell3d.regs.global_base_instance_index = base_instance;
+ maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
+ maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
}
-
-private:
- void Fallback(const std::vector& parameters) {
- maxwell3d.RefreshParameters();
-
- maxwell3d.regs.draw.begin = parameters[0];
- maxwell3d.regs.draw_auto_stride = parameters[1];
- maxwell3d.regs.draw_auto_byte_count = parameters[2];
-
- maxwell3d.draw_manager->DrawArray(
- maxwell3d.regs.draw.topology, 0,
- maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1);
- }
-};
-
-class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl {
-public:
- explicit HLE_C713C83D8F63CCF3(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
- void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override {
- maxwell3d.RefreshParameters();
- const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2;
- const u32 address = maxwell3d.regs.shadow_scratch[24];
- auto& const_buffer = maxwell3d.regs.const_buffer;
- const_buffer.size = 0x7000;
- const_buffer.address_high = (address >> 24) & 0xFF;
- const_buffer.address_low = address << 8;
- const_buffer.offset = offset;
- }
-};
-
-class HLE_D7333D26E0A93EDE final : public HLEMacroImpl {
-public:
- explicit HLE_D7333D26E0A93EDE(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
- void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override {
- maxwell3d.RefreshParameters();
- const size_t index = parameters[0];
- const u32 address = maxwell3d.regs.shadow_scratch[42 + index];
- const u32 size = maxwell3d.regs.shadow_scratch[47 + index];
- auto& const_buffer = maxwell3d.regs.const_buffer;
- const_buffer.size = size;
- const_buffer.address_high = (address >> 24) & 0xFF;
- const_buffer.address_low = address << 8;
- }
-};
-
-class HLE_BindShader final : public HLEMacroImpl {
-public:
- explicit HLE_BindShader(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
- void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override {
- maxwell3d.RefreshParameters();
- auto& regs = maxwell3d.regs;
- const u32 index = parameters[0];
- if ((parameters[1] - regs.shadow_scratch[28 + index]) == 0) {
- return;
- }
-
- regs.pipelines[index & 0xF].offset = parameters[2];
- maxwell3d.dirty.flags[VideoCommon::Dirty::Shaders] = true;
- regs.shadow_scratch[28 + index] = parameters[1];
- regs.shadow_scratch[34 + index] = parameters[2];
-
- const u32 address = parameters[4];
- auto& const_buffer = regs.const_buffer;
- const_buffer.size = 0x10000;
- const_buffer.address_high = (address >> 24) & 0xFF;
- const_buffer.address_low = address << 8;
-
- const size_t bind_group_id = parameters[3] & 0x7F;
- auto& bind_group = regs.bind_groups[bind_group_id];
- bind_group.raw_config = 0x11;
- maxwell3d.ProcessCBBind(bind_group_id);
- }
-};
-
-class HLE_SetRasterBoundingBox final : public HLEMacroImpl {
-public:
- explicit HLE_SetRasterBoundingBox(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
- void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override {
- maxwell3d.RefreshParameters();
- const u32 raster_mode = parameters[0];
- auto& regs = maxwell3d.regs;
- const u32 raster_enabled = maxwell3d.regs.conservative_raster_enable;
- const u32 scratch_data = maxwell3d.regs.shadow_scratch[52];
- regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F;
- regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled);
- }
-};
-
-template
-class HLE_ClearConstBuffer final : public HLEMacroImpl {
-public:
- explicit HLE_ClearConstBuffer(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
- void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override {
- maxwell3d.RefreshParameters();
- static constexpr std::array zeroes{};
- auto& regs = maxwell3d.regs;
- regs.const_buffer.size = u32(base_size);
- regs.const_buffer.address_high = parameters[0];
- regs.const_buffer.address_low = parameters[1];
- regs.const_buffer.offset = 0;
- maxwell3d.ProcessCBMultiData(zeroes.data(), parameters[2] * 4);
- }
-};
-
-class HLE_ClearMemory final : public HLEMacroImpl {
-public:
- explicit HLE_ClearMemory(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
- void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override {
- maxwell3d.RefreshParameters();
-
- const u32 needed_memory = parameters[2] / sizeof(u32);
- if (needed_memory > zero_memory.size()) {
- zero_memory.resize(needed_memory, 0);
- }
- auto& regs = maxwell3d.regs;
- regs.upload.line_length_in = parameters[2];
- regs.upload.line_count = 1;
- regs.upload.dest.address_high = parameters[0];
- regs.upload.dest.address_low = parameters[1];
- maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
- maxwell3d.CallMultiMethod(size_t(MAXWELL3D_REG_INDEX(inline_data)), zero_memory.data(), needed_memory, needed_memory);
- }
-
-private:
- std::vector zero_memory;
-};
-
-class HLE_TransformFeedbackSetup final : public HLEMacroImpl {
-public:
- explicit HLE_TransformFeedbackSetup(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
- void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override {
- maxwell3d.RefreshParameters();
-
- auto& regs = maxwell3d.regs;
- regs.transform_feedback_enabled = 1;
- regs.transform_feedback.buffers[0].start_offset = 0;
- regs.transform_feedback.buffers[1].start_offset = 0;
- regs.transform_feedback.buffers[2].start_offset = 0;
- regs.transform_feedback.buffers[3].start_offset = 0;
-
- regs.upload.line_length_in = 4;
- regs.upload.line_count = 1;
- regs.upload.dest.address_high = parameters[0];
- regs.upload.dest.address_low = parameters[1];
- maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
- maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(inline_data)), regs.transform_feedback.controls[0].stride, true);
-
- maxwell3d.Rasterizer().RegisterTransformFeedback(regs.upload.dest.Address());
- }
-};
-
-} // Anonymous namespace
-
-HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
-
-HLEMacro::~HLEMacro() = default;
-
-std::unique_ptr HLEMacro::GetHLEProgram(u64 hash) const {
- // Compiler will make you a GREAT job at making an ad-hoc hash table :)
- switch (hash) {
- case 0x0D61FC9FAAC9FCADULL: return std::make_unique>(maxwell3d);
- case 0x8A4D173EB99A8603ULL: return std::make_unique>(maxwell3d);
- case 0x771BB18C62444DA0ULL: return std::make_unique>(maxwell3d);
- case 0x0217920100488FF7ULL: return std::make_unique>(maxwell3d);
- case 0x3F5E74B9C9A50164ULL: return std::make_unique(maxwell3d);
- case 0xEAD26C3E2109B06BULL: return std::make_unique(maxwell3d);
- case 0xC713C83D8F63CCF3ULL: return std::make_unique(maxwell3d);
- case 0xD7333D26E0A93EDEULL: return std::make_unique(maxwell3d);
- case 0xEB29B2A09AA06D38ULL: return std::make_unique(maxwell3d);
- case 0xDB1341DBEB4C8AF7ULL: return std::make_unique(maxwell3d);
- case 0x6C97861D891EDf7EULL: return std::make_unique>(maxwell3d);
- case 0xD246FDDF3A6173D7ULL: return std::make_unique>(maxwell3d);
- case 0xEE4D0004BEC8ECF4ULL: return std::make_unique(maxwell3d);
- case 0xFC0CF27F5FFAA661ULL: return std::make_unique(maxwell3d);
- case 0xB5F74EDB717278ECULL: return std::make_unique(maxwell3d);
- default:
- return nullptr;
+ maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance, instance_count);
+ if (extended) {
+ maxwell3d.regs.global_base_instance_index = 0;
+ maxwell3d.engine_state = Maxwell3D::EngineHint::None;
+ maxwell3d.replace_table.clear();
}
}
-namespace {
-class MacroInterpreterImpl final : public CachedMacro {
-public:
- explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector& code_)
- : CachedMacro(maxwell3d_)
- , code{code_}
- {}
+void HLE_DrawIndexedIndirect::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) {
+ auto topology = static_cast(parameters[0]);
+ if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
+ Fallback(maxwell3d, parameters);
+ return;
+ }
- void Execute(const std::vector& params, u32 method) override;
+ const u32 estimate = u32(maxwell3d.EstimateIndexBufferSize());
+ const u32 element_base = parameters[4];
+ const u32 base_instance = parameters[5];
+ maxwell3d.regs.vertex_id_base = element_base;
+ maxwell3d.regs.global_base_vertex_index = element_base;
+ maxwell3d.regs.global_base_instance_index = base_instance;
+ maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ if (extended) {
+ maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
+ maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
+ maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
+ }
+ auto& params = maxwell3d.draw_manager->GetIndirectParams();
+ params.is_byte_count = false;
+ params.is_indexed = true;
+ params.include_count = false;
+ params.count_start_address = 0;
+ params.indirect_start_address = maxwell3d.GetMacroAddress(1);
+ params.buffer_size = 5 * sizeof(u32);
+ params.max_draw_counts = 1;
+ params.stride = 0;
+ maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate);
+ maxwell3d.regs.vertex_id_base = 0x0;
+ maxwell3d.regs.global_base_vertex_index = 0x0;
+ maxwell3d.regs.global_base_instance_index = 0x0;
+ if (extended) {
+ maxwell3d.engine_state = Maxwell3D::EngineHint::None;
+ maxwell3d.replace_table.clear();
+ }
+}
+void HLE_DrawIndexedIndirect::Fallback(Engines::Maxwell3D& maxwell3d, std::span parameters) {
+ maxwell3d.RefreshParameters();
+ const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
+ const u32 element_base = parameters[4];
+ const u32 base_instance = parameters[5];
+ maxwell3d.regs.vertex_id_base = element_base;
+ maxwell3d.regs.global_base_vertex_index = element_base;
+ maxwell3d.regs.global_base_instance_index = base_instance;
+ maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ if (extended) {
+ maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
+ maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
+ maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
+ }
+ maxwell3d.draw_manager->DrawIndex(Tegra::Maxwell3D::Regs::PrimitiveTopology(parameters[0]), parameters[3], parameters[1], element_base, base_instance, instance_count);
+ maxwell3d.regs.vertex_id_base = 0x0;
+ maxwell3d.regs.global_base_vertex_index = 0x0;
+ maxwell3d.regs.global_base_instance_index = 0x0;
+ if (extended) {
+ maxwell3d.engine_state = Maxwell3D::EngineHint::None;
+ maxwell3d.replace_table.clear();
+ }
+}
+void HLE_MultiLayerClear::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) {
+ maxwell3d.RefreshParameters();
+ ASSERT(parameters.size() == 1);
-private:
- /// Resets the execution engine state, zeroing registers, etc.
- void Reset();
+ const Maxwell3D::Regs::ClearSurface clear_params{parameters[0]};
+ const u32 rt_index = clear_params.RT;
+ const u32 num_layers = maxwell3d.regs.rt[rt_index].depth;
+ ASSERT(clear_params.layer == 0);
- /**
- * Executes a single macro instruction located at the current program counter. Returns whether
- * the interpreter should keep running.
- *
- * @param is_delay_slot Whether the current step is being executed due to a delay slot in a
- * previous instruction.
- */
- bool Step(bool is_delay_slot);
+ maxwell3d.regs.clear_surface.raw = clear_params.raw;
+ maxwell3d.draw_manager->Clear(num_layers);
+}
+void HLE_MultiDrawIndexedIndirectCount::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) {
+ const auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[2]);
+ if (!IsTopologySafe(topology)) {
+ Fallback(maxwell3d, parameters);
+ return;
+ }
- /// Calculates the result of an ALU operation. src_a OP src_b;
- u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
+ const u32 start_indirect = parameters[0];
+ const u32 end_indirect = parameters[1];
+ if (start_indirect >= end_indirect) {
+ // Nothing to do.
+ return;
+ }
- /// Performs the result operation on the input result and stores it in the specified register
- /// (if necessary).
- void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result);
+ const u32 padding = parameters[3]; // padding is in words
- /// Evaluates the branch condition and returns whether the branch should be taken or not.
- bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
+ // size of each indirect segment
+ const u32 indirect_words = 5 + padding;
+ const u32 stride = indirect_words * sizeof(u32);
+ const std::size_t draw_count = end_indirect - start_indirect;
+ const u32 estimate = static_cast(maxwell3d.EstimateIndexBufferSize());
+ maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ auto& params = maxwell3d.draw_manager->GetIndirectParams();
+ params.is_byte_count = false;
+ params.is_indexed = true;
+ params.include_count = true;
+ params.count_start_address = maxwell3d.GetMacroAddress(4);
+ params.indirect_start_address = maxwell3d.GetMacroAddress(5);
+ params.buffer_size = stride * draw_count;
+ params.max_draw_counts = draw_count;
+ params.stride = stride;
+ maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
+ maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
+ maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
+ maxwell3d.SetHLEReplacementAttributeType(0, 0x648, Maxwell3D::HLEReplacementAttributeType::DrawID);
+ maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate);
+ maxwell3d.engine_state = Maxwell3D::EngineHint::None;
+ maxwell3d.replace_table.clear();
+}
+void HLE_MultiDrawIndexedIndirectCount::Fallback(Engines::Maxwell3D& maxwell3d, std::span parameters) {
+ SCOPE_EXIT {
+ // Clean everything.
+ maxwell3d.regs.vertex_id_base = 0x0;
+ maxwell3d.engine_state = Maxwell3D::EngineHint::None;
+ maxwell3d.replace_table.clear();
+ };
+ maxwell3d.RefreshParameters();
+ const u32 start_indirect = parameters[0];
+ const u32 end_indirect = parameters[1];
+ if (start_indirect >= end_indirect) {
+ // Nothing to do.
+ return;
+ }
+ const auto topology = static_cast(parameters[2]);
+ const u32 padding = parameters[3];
+ const std::size_t max_draws = parameters[4];
+ const u32 indirect_words = 5 + padding;
+ const std::size_t first_draw = start_indirect;
+ const std::size_t effective_draws = end_indirect - start_indirect;
+ const std::size_t last_draw = start_indirect + (std::min)(effective_draws, max_draws);
+ for (std::size_t index = first_draw; index < last_draw; index++) {
+ const std::size_t base = index * indirect_words + 5;
+ const u32 base_vertex = parameters[base + 3];
+ const u32 base_instance = parameters[base + 4];
+ maxwell3d.regs.vertex_id_base = base_vertex;
+ maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
+ maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
+ maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
+ maxwell3d.CallMethod(0x8e3, 0x648, true);
+ maxwell3d.CallMethod(0x8e4, static_cast(index), true);
+ maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base], base_vertex, base_instance, parameters[base + 1]);
+ }
+}
+void HLE_DrawIndirectByteCount::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) {
+ const bool force = maxwell3d.Rasterizer().HasDrawTransformFeedback();
+ auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[0] & 0xFFFFU);
+ if (!force && (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology))) {
+ Fallback(maxwell3d, parameters);
+ return;
+ }
+ auto& params = maxwell3d.draw_manager->GetIndirectParams();
+ params.is_byte_count = true;
+ params.is_indexed = false;
+ params.include_count = false;
+ params.count_start_address = 0;
+ params.indirect_start_address = maxwell3d.GetMacroAddress(2);
+ params.buffer_size = 4;
+ params.max_draw_counts = 1;
+ params.stride = parameters[1];
+ maxwell3d.regs.draw.begin = parameters[0];
+ maxwell3d.regs.draw_auto_stride = parameters[1];
+ maxwell3d.regs.draw_auto_byte_count = parameters[2];
+ maxwell3d.draw_manager->DrawArrayIndirect(topology);
+}
+void HLE_DrawIndirectByteCount::Fallback(Engines::Maxwell3D& maxwell3d, std::span parameters) {
+ maxwell3d.RefreshParameters();
- /// Reads an opcode at the current program counter location.
- Macro::Opcode GetOpcode() const;
+ maxwell3d.regs.draw.begin = parameters[0];
+ maxwell3d.regs.draw_auto_stride = parameters[1];
+ maxwell3d.regs.draw_auto_byte_count = parameters[2];
- /// Returns the specified register's value. Register 0 is hardcoded to always return 0.
- u32 GetRegister(u32 register_id) const;
+ maxwell3d.draw_manager->DrawArray(
+ maxwell3d.regs.draw.topology, 0,
+ maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1);
+}
+void HLE_C713C83D8F63CCF3::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) {
+ maxwell3d.RefreshParameters();
+ const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2;
+ const u32 address = maxwell3d.regs.shadow_scratch[24];
+ auto& const_buffer = maxwell3d.regs.const_buffer;
+ const_buffer.size = 0x7000;
+ const_buffer.address_high = (address >> 24) & 0xFF;
+ const_buffer.address_low = address << 8;
+ const_buffer.offset = offset;
+}
+void HLE_D7333D26E0A93EDE::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) {
+ maxwell3d.RefreshParameters();
+ const size_t index = parameters[0];
+ const u32 address = maxwell3d.regs.shadow_scratch[42 + index];
+ const u32 size = maxwell3d.regs.shadow_scratch[47 + index];
+ auto& const_buffer = maxwell3d.regs.const_buffer;
+ const_buffer.size = size;
+ const_buffer.address_high = (address >> 24) & 0xFF;
+ const_buffer.address_low = address << 8;
+}
+void HLE_BindShader::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) {
+ maxwell3d.RefreshParameters();
+ auto& regs = maxwell3d.regs;
+ const u32 index = parameters[0];
+ if ((parameters[1] - regs.shadow_scratch[28 + index]) == 0) {
+ return;
+ }
- /// Sets the register to the input value.
- void SetRegister(u32 register_id, u32 value);
+ regs.pipelines[index & 0xF].offset = parameters[2];
+ maxwell3d.dirty.flags[VideoCommon::Dirty::Shaders] = true;
+ regs.shadow_scratch[28 + index] = parameters[1];
+ regs.shadow_scratch[34 + index] = parameters[2];
- /// Sets the method address to use for the next Send instruction.
- void SetMethodAddress(u32 address);
+ const u32 address = parameters[4];
+ auto& const_buffer = regs.const_buffer;
+ const_buffer.size = 0x10000;
+ const_buffer.address_high = (address >> 24) & 0xFF;
+ const_buffer.address_low = address << 8;
- /// Calls a GPU Engine method with the input parameter.
- void Send(u32 value);
+ const size_t bind_group_id = parameters[3] & 0x7F;
+ auto& bind_group = regs.bind_groups[bind_group_id];
+ bind_group.raw_config = 0x11;
+ maxwell3d.ProcessCBBind(bind_group_id);
+}
+void HLE_SetRasterBoundingBox::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) {
+ maxwell3d.RefreshParameters();
+ const u32 raster_mode = parameters[0];
+ auto& regs = maxwell3d.regs;
+ const u32 raster_enabled = maxwell3d.regs.conservative_raster_enable;
+ const u32 scratch_data = maxwell3d.regs.shadow_scratch[52];
+ regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F;
+ regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled);
+}
+void HLE_ClearConstBuffer::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) {
+ static constexpr std::array zeroes{}; //must be bigger than either 7000 or 5F00
+ maxwell3d.RefreshParameters();
+ auto& regs = maxwell3d.regs;
+ regs.const_buffer.size = u32(base_size);
+ regs.const_buffer.address_high = parameters[0];
+ regs.const_buffer.address_low = parameters[1];
+ regs.const_buffer.offset = 0;
+ maxwell3d.ProcessCBMultiData(zeroes.data(), parameters[2] * 4);
+}
+void HLE_ClearMemory::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) {
+ maxwell3d.RefreshParameters();
+ const u32 needed_memory = parameters[2] / sizeof(u32);
+ if (needed_memory > zero_memory.size()) {
+ zero_memory.resize(needed_memory, 0);
+ }
+ auto& regs = maxwell3d.regs;
+ regs.upload.line_length_in = parameters[2];
+ regs.upload.line_count = 1;
+ regs.upload.dest.address_high = parameters[0];
+ regs.upload.dest.address_low = parameters[1];
+ maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
+ maxwell3d.CallMultiMethod(size_t(MAXWELL3D_REG_INDEX(inline_data)), zero_memory.data(), needed_memory, needed_memory);
+}
+void HLE_TransformFeedbackSetup::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) {
+ maxwell3d.RefreshParameters();
+ auto& regs = maxwell3d.regs;
+ regs.transform_feedback_enabled = 1;
+ regs.transform_feedback.buffers[0].start_offset = 0;
+ regs.transform_feedback.buffers[1].start_offset = 0;
+ regs.transform_feedback.buffers[2].start_offset = 0;
+ regs.transform_feedback.buffers[3].start_offset = 0;
+ regs.upload.line_length_in = 4;
+ regs.upload.line_count = 1;
+ regs.upload.dest.address_high = parameters[0];
+ regs.upload.dest.address_low = parameters[1];
+ maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
+ maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(inline_data)), regs.transform_feedback.controls[0].stride, true);
+ maxwell3d.Rasterizer().RegisterTransformFeedback(regs.upload.dest.Address());
+}
- /// Reads a GPU register located at the method address.
- u32 Read(u32 method) const;
+#define HLE_MACRO_LIST \
+ HLE_MACRO_ELEM(0x0D61FC9FAAC9FCADULL, HLE_DrawArraysIndirect, (false)) \
+ HLE_MACRO_ELEM(0x8A4D173EB99A8603ULL, HLE_DrawArraysIndirect, (true)) \
+ HLE_MACRO_ELEM(0x771BB18C62444DA0ULL, HLE_DrawIndexedIndirect, (false)) \
+ HLE_MACRO_ELEM(0x0217920100488FF7ULL, HLE_DrawIndexedIndirect, (true)) \
+ HLE_MACRO_ELEM(0x3F5E74B9C9A50164ULL, HLE_MultiDrawIndexedIndirectCount, ()) \
+ HLE_MACRO_ELEM(0xEAD26C3E2109B06BULL, HLE_MultiLayerClear, ()) \
+ HLE_MACRO_ELEM(0xC713C83D8F63CCF3ULL, HLE_C713C83D8F63CCF3, ()) \
+ HLE_MACRO_ELEM(0xD7333D26E0A93EDEULL, HLE_D7333D26E0A93EDE, ()) \
+ HLE_MACRO_ELEM(0xEB29B2A09AA06D38ULL, HLE_BindShader, ()) \
+ HLE_MACRO_ELEM(0xDB1341DBEB4C8AF7ULL, HLE_SetRasterBoundingBox, ()) \
+ HLE_MACRO_ELEM(0x6C97861D891EDf7EULL, HLE_ClearConstBuffer, (0x5F00)) \
+ HLE_MACRO_ELEM(0xD246FDDF3A6173D7ULL, HLE_ClearConstBuffer, (0x7000)) \
+ HLE_MACRO_ELEM(0xEE4D0004BEC8ECF4ULL, HLE_ClearMemory, ()) \
+ HLE_MACRO_ELEM(0xFC0CF27F5FFAA661ULL, HLE_TransformFeedbackSetup, ()) \
+ HLE_MACRO_ELEM(0xB5F74EDB717278ECULL, HLE_DrawIndirectByteCount, ()) \
- /// Returns the next parameter in the parameter queue.
- u32 FetchParameter();
+// Allocates and returns a cached macro if the hash matches a known function.
+[[nodiscard]] inline AnyCachedMacro GetHLEProgram(u64 hash) noexcept {
+ // Compiler will make you a GREAT job at making an ad-hoc hash table :)
+ switch (hash) {
+#define HLE_MACRO_ELEM(HASH, TY, VAL) case HASH: return TY VAL;
+ HLE_MACRO_LIST
+#undef HLE_MACRO_ELEM
+ default: return std::monostate{};
+ }
+}
+[[nodiscard]] inline bool CanBeHLEProgram(u64 hash) noexcept {
+ switch (hash) {
+#define HLE_MACRO_ELEM(HASH, TY, VAL) case HASH: return true;
+ HLE_MACRO_LIST
+#undef HLE_MACRO_ELEM
+ default: return false;
+ }
+}
- /// Current program counter
- u32 pc{};
- /// Program counter to execute at after the delay slot is executed.
- std::optional delayed_pc;
-
- /// General purpose macro registers.
- std::array registers = {};
-
- /// Method address to use for the next Send instruction.
- Macro::MethodAddress method_address = {};
-
- /// Input parameters of the current macro.
- std::unique_ptr parameters;
- std::size_t num_parameters = 0;
- std::size_t parameters_capacity = 0;
- /// Index of the next parameter that will be fetched by the 'parm' instruction.
- u32 next_parameter_index = 0;
-
- bool carry_flag = false;
- const std::vector& code;
-};
-
-void MacroInterpreterImpl::Execute(const std::vector& params, u32 method) {
+void MacroInterpreterImpl::Execute(Engines::Maxwell3D& maxwell3d, std::span params, u32 method) {
Reset();
registers[1] = params[0];
- num_parameters = params.size();
-
- if (num_parameters > parameters_capacity) {
- parameters_capacity = num_parameters;
- parameters = std::make_unique(num_parameters);
- }
- std::memcpy(parameters.get(), params.data(), num_parameters * sizeof(u32));
+ parameters.resize(params.size());
+ std::memcpy(parameters.data(), params.data(), params.size() * sizeof(u32));
// Execute the code until we hit an exit condition.
bool keep_executing = true;
while (keep_executing) {
- keep_executing = Step(false);
+ keep_executing = Step(maxwell3d, false);
}
// Assert the the macro used all the input parameters
- ASSERT(next_parameter_index == num_parameters);
+ ASSERT(next_parameter_index == parameters.size());
}
+/// Resets the execution engine state, zeroing registers, etc.
void MacroInterpreterImpl::Reset() {
registers = {};
pc = 0;
delayed_pc = {};
method_address.raw = 0;
- num_parameters = 0;
+ // Vector must hold its last indices otherwise wonky shit will happen
// The next parameter index starts at 1, because $r1 already has the value of the first
// parameter.
next_parameter_index = 1;
carry_flag = false;
}
-bool MacroInterpreterImpl::Step(bool is_delay_slot) {
+/// @brief Executes a single macro instruction located at the current program counter. Returns whether
+/// the interpreter should keep running.
+/// @param is_delay_slot Whether the current step is being executed due to a delay slot in a previous instruction.
+bool MacroInterpreterImpl::Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slot) {
u32 base_address = pc;
Macro::Opcode opcode = GetOpcode();
@@ -682,14 +493,12 @@ bool MacroInterpreterImpl::Step(bool is_delay_slot) {
switch (opcode.operation) {
case Macro::Operation::ALU: {
- u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a),
- GetRegister(opcode.src_b));
- ProcessResult(opcode.result_operation, opcode.dst, result);
+ u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), GetRegister(opcode.src_b));
+ ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, result);
break;
}
case Macro::Operation::AddImmediate: {
- ProcessResult(opcode.result_operation, opcode.dst,
- GetRegister(opcode.src_a) + opcode.immediate);
+ ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, GetRegister(opcode.src_a) + opcode.immediate);
break;
}
case Macro::Operation::ExtractInsert: {
@@ -699,7 +508,7 @@ bool MacroInterpreterImpl::Step(bool is_delay_slot) {
src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask();
dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit);
dst |= src << opcode.bf_dst_bit;
- ProcessResult(opcode.result_operation, opcode.dst, dst);
+ ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, dst);
break;
}
case Macro::Operation::ExtractShiftLeftImmediate: {
@@ -708,7 +517,7 @@ bool MacroInterpreterImpl::Step(bool is_delay_slot) {
u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit;
- ProcessResult(opcode.result_operation, opcode.dst, result);
+ ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, result);
break;
}
case Macro::Operation::ExtractShiftLeftRegister: {
@@ -717,12 +526,12 @@ bool MacroInterpreterImpl::Step(bool is_delay_slot) {
u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst;
- ProcessResult(opcode.result_operation, opcode.dst, result);
+ ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, result);
break;
}
case Macro::Operation::Read: {
- u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate);
- ProcessResult(opcode.result_operation, opcode.dst, result);
+ u32 result = Read(maxwell3d, GetRegister(opcode.src_a) + opcode.immediate);
+ ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, result);
break;
}
case Macro::Operation::Branch: {
@@ -738,7 +547,7 @@ bool MacroInterpreterImpl::Step(bool is_delay_slot) {
delayed_pc = base_address + opcode.GetBranchTarget();
// Execute one more instruction due to the delay slot.
- return Step(true);
+ return Step(maxwell3d, true);
}
break;
}
@@ -751,13 +560,13 @@ bool MacroInterpreterImpl::Step(bool is_delay_slot) {
// cause an exit if it's executed inside a delay slot.
if (opcode.is_exit && !is_delay_slot) {
// Exit has a delay slot, execute the next instruction
- Step(true);
+ Step(maxwell3d, true);
return false;
}
-
return true;
}
+/// Calculates the result of an ALU operation. src_a OP src_b;
u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b) {
switch (operation) {
case Macro::ALUOperation::Add: {
@@ -797,7 +606,8 @@ u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a,
}
}
-void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result) {
+/// Performs the result operation on the input result and stores it in the specified register (if necessary).
+void MacroInterpreterImpl::ProcessResult(Engines::Maxwell3D& maxwell3d, Macro::ResultOperation operation, u32 reg, u32 result) {
switch (operation) {
case Macro::ResultOperation::IgnoreAndFetch:
// Fetch parameter and ignore result.
@@ -815,12 +625,12 @@ void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 r
case Macro::ResultOperation::FetchAndSend:
// Fetch parameter and send result.
SetRegister(reg, FetchParameter());
- Send(result);
+ Send(maxwell3d, result);
break;
case Macro::ResultOperation::MoveAndSend:
// Move and send result.
SetRegister(reg, result);
- Send(result);
+ Send(maxwell3d, result);
break;
case Macro::ResultOperation::FetchAndSetMethod:
// Fetch parameter and use result as Method Address.
@@ -831,13 +641,13 @@ void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 r
// Move result and use as Method Address, then fetch and send parameter.
SetRegister(reg, result);
SetMethodAddress(result);
- Send(FetchParameter());
+ Send(maxwell3d, FetchParameter());
break;
case Macro::ResultOperation::MoveAndSetMethodSend:
// Move result and use as Method Address, then send bits 12:17 of result.
SetRegister(reg, result);
SetMethodAddress(result);
- Send((result >> 12) & 0b111111);
+ Send(maxwell3d, (result >> 12) & 0b111111);
break;
default:
UNIMPLEMENTED_MSG("Unimplemented result operation {}", operation);
@@ -845,6 +655,7 @@ void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 r
}
}
+/// Evaluates the branch condition and returns whether the branch should be taken or not.
bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const {
switch (cond) {
case Macro::BranchCondition::Zero:
@@ -855,46 +666,44 @@ bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond,
UNREACHABLE();
}
+/// Reads an opcode at the current program counter location.
Macro::Opcode MacroInterpreterImpl::GetOpcode() const {
ASSERT((pc % sizeof(u32)) == 0);
ASSERT(pc < code.size() * sizeof(u32));
return {code[pc / sizeof(u32)]};
}
+/// Returns the specified register's value. Register 0 is hardcoded to always return 0.
u32 MacroInterpreterImpl::GetRegister(u32 register_id) const {
- return registers.at(register_id);
+ return registers[register_id];
}
+/// Sets the register to the input value.
void MacroInterpreterImpl::SetRegister(u32 register_id, u32 value) {
// Register 0 is hardwired as the zero register.
// Ensure no writes to it actually occur.
- if (register_id == 0) {
+ if (register_id == 0)
return;
- }
-
- registers.at(register_id) = value;
+ registers[register_id] = value;
}
-void MacroInterpreterImpl::SetMethodAddress(u32 address) {
- method_address.raw = address;
-}
-
-void MacroInterpreterImpl::Send(u32 value) {
+/// Calls a GPU Engine method with the input parameter.
+void MacroInterpreterImpl::Send(Engines::Maxwell3D& maxwell3d, u32 value) {
maxwell3d.CallMethod(method_address.address, value, true);
// Increment the method address by the method increment.
- method_address.address.Assign(method_address.address.Value() +
- method_address.increment.Value());
+ method_address.address.Assign(method_address.address.Value() + method_address.increment.Value());
}
-u32 MacroInterpreterImpl::Read(u32 method) const {
+/// Reads a GPU register located at the method address.
+u32 MacroInterpreterImpl::Read(Engines::Maxwell3D& maxwell3d, u32 method) const {
return maxwell3d.GetRegisterValue(method);
}
+/// Returns the next parameter in the parameter queue.
u32 MacroInterpreterImpl::FetchParameter() {
- ASSERT(next_parameter_index < num_parameters);
+ ASSERT(next_parameter_index < parameters.size());
return parameters[next_parameter_index++];
}
-} // Anonymous namespace
#ifdef ARCHITECTURE_x86_64
namespace {
@@ -930,17 +739,15 @@ static const auto default_cg_mode = Xbyak::DontSetProtectRWE;
static const auto default_cg_mode = nullptr; //Allow RWE
#endif
-class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro {
-public:
- explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector& code_)
+struct MacroJITx64Impl final : public Xbyak::CodeGenerator, public DynamicCachedMacro {
+ explicit MacroJITx64Impl(std::span code_)
: Xbyak::CodeGenerator(MAX_CODE_SIZE, default_cg_mode)
- , CachedMacro(maxwell3d_)
, code{code_}
{
Compile();
}
- void Execute(const std::vector& parameters, u32 method) override;
+ void Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, u32 method) override;
void Compile_ALU(Macro::Opcode opcode);
void Compile_AddImmediate(Macro::Opcode opcode);
@@ -950,18 +757,13 @@ public:
void Compile_Read(Macro::Opcode opcode);
void Compile_Branch(Macro::Opcode opcode);
-private:
void Optimizer_ScanFlags();
-
void Compile();
bool Compile_NextInstruction();
-
Xbyak::Reg32 Compile_FetchParameter();
Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
-
void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
void Compile_Send(Xbyak::Reg32 value);
-
Macro::Opcode GetOpCode() const;
struct JITState {
@@ -981,21 +783,17 @@ private:
bool enable_asserts{};
};
OptimizerState optimizer{};
-
std::optional next_opcode{};
ProgramType program{nullptr};
-
std::array labels;
std::array delay_skip;
Xbyak::Label end_of_code{};
-
bool is_delay_slot{};
u32 pc{};
-
- const std::vector& code;
+ std::span code;
};
-void MacroJITx64Impl::Execute(const std::vector& parameters, u32 method) {
+void MacroJITx64Impl::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, u32 method) {
ASSERT_OR_EXECUTE(program != nullptr, { return; });
JITState state{};
state.maxwell3d = &maxwell3d;
@@ -1231,7 +1029,7 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
Compile_ProcessResult(opcode.result_operation, opcode.dst);
}
-void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
+static void MacroJIT_SendThunk(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
maxwell3d->CallMethod(method_address.address, value, true);
}
@@ -1240,7 +1038,7 @@ void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
mov(Common::X64::ABI_PARAM1, qword[STATE]);
mov(Common::X64::ABI_PARAM2.cvt32(), METHOD_ADDRESS);
mov(Common::X64::ABI_PARAM3.cvt32(), value);
- Common::X64::CallFarFunction(*this, &Send);
+ Common::X64::CallFarFunction(*this, &MacroJIT_SendThunk);
Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
Xbyak::Label dont_process{};
@@ -1452,10 +1250,8 @@ bool MacroJITx64Impl::Compile_NextInstruction() {
return true;
}
-static void WarnInvalidParameter(uintptr_t parameter, uintptr_t max_parameter) {
- LOG_CRITICAL(HW_GPU,
- "Macro JIT: invalid parameter access 0x{:x} (0x{:x} is the last parameter)",
- parameter, max_parameter - sizeof(u32));
+static void MacroJIT_ErrorThunk(uintptr_t parameter, uintptr_t max_parameter) {
+ LOG_CRITICAL(HW_GPU, "Macro JIT: invalid parameter access 0x{:x} (0x{:x} is the last parameter)", parameter, max_parameter - sizeof(u32));
}
Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() {
@@ -1465,7 +1261,7 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() {
Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
mov(Common::X64::ABI_PARAM1, PARAMETERS);
mov(Common::X64::ABI_PARAM2, MAX_PARAMETER);
- Common::X64::CallFarFunction(*this, &WarnInvalidParameter);
+ Common::X64::CallFarFunction(*this, &MacroJIT_ErrorThunk);
Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
L(parameter_ok);
mov(eax, dword[PARAMETERS]);
@@ -1574,33 +1370,42 @@ static void Dump(u64 hash, std::span code, bool decompiled = false) {
macro_file.write(reinterpret_cast(code.data()), code.size_bytes());
}
-MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d_, bool is_interpreted_)
- : hle_macros{std::make_optional(maxwell3d_)}
- , maxwell3d{maxwell3d_}
- , is_interpreted{is_interpreted_}
-{}
-
-MacroEngine::~MacroEngine() = default;
-
-void MacroEngine::AddCode(u32 method, u32 data) {
- uploaded_macro_code[method].push_back(data);
-}
-
-void MacroEngine::ClearCode(u32 method) {
- macro_cache.erase(method);
- uploaded_macro_code.erase(method);
-}
-
-void MacroEngine::Execute(u32 method, const std::vector& parameters) {
- auto compiled_macro = macro_cache.find(method);
- if (compiled_macro != macro_cache.end()) {
- const auto& cache_info = compiled_macro->second;
- if (cache_info.has_hle_program) {
- cache_info.hle_program->Execute(parameters, method);
- } else {
- maxwell3d.RefreshParameters();
- cache_info.lle_program->Execute(parameters, method);
- }
+void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, std::span parameters) {
+ auto const execute_variant = [&maxwell3d, ¶meters, method](AnyCachedMacro& acm) {
+ if (auto a = std::get_if(&acm))
+ a->Execute(maxwell3d, parameters, method);
+ if (auto a = std::get_if(&acm))
+ a->Execute(maxwell3d, parameters, method);
+ if (auto a = std::get_if(&acm))
+ a->Execute(maxwell3d, parameters, method);
+ if (auto a = std::get_if(&acm))
+ a->Execute(maxwell3d, parameters, method);
+ if (auto a = std::get_if(&acm))
+ a->Execute(maxwell3d, parameters, method);
+ if (auto a = std::get_if(&acm))
+ a->Execute(maxwell3d, parameters, method);
+ if (auto a = std::get_if(&acm))
+ a->Execute(maxwell3d, parameters, method);
+ if (auto a = std::get_if(&acm))
+ a->Execute(maxwell3d, parameters, method);
+ if (auto a = std::get_if(&acm))
+ a->Execute(maxwell3d, parameters, method);
+ if (auto a = std::get_if(&acm))
+ a->Execute(maxwell3d, parameters, method);
+ if (auto a = std::get_if(&acm))
+ a->Execute(maxwell3d, parameters, method);
+ if (auto a = std::get_if(&acm))
+ a->Execute(maxwell3d, parameters, method);
+ if (auto a = std::get_if(&acm))
+ a->Execute(maxwell3d, parameters, method);
+ if (auto a = std::get_if>(&acm))
+ a->get()->Execute(maxwell3d, parameters, method);
+ };
+ if (auto const it = macro_cache.find(method); it != macro_cache.end()) {
+ auto& ci = it->second;
+ if (!CanBeHLEProgram(ci.hash) || Settings::values.disable_macro_hle)
+ maxwell3d.RefreshParameters(); //LLE must reload parameters
+ execute_variant(ci.program);
} else {
// Macro not compiled, check if it's uploaded and if so, compile it
std::optional mid_method;
@@ -1617,51 +1422,37 @@ void MacroEngine::Execute(u32 method, const std::vector& parameters) {
return;
}
}
- auto& cache_info = macro_cache[method];
-
- if (!mid_method.has_value()) {
- cache_info.lle_program = Compile(macro_code->second);
- cache_info.hash = Common::HashValue(macro_code->second);
- } else {
+ auto& ci = macro_cache[method];
+ if (mid_method) {
const auto& macro_cached = uploaded_macro_code[mid_method.value()];
const auto rebased_method = method - mid_method.value();
auto& code = uploaded_macro_code[method];
code.resize(macro_cached.size() - rebased_method);
std::memcpy(code.data(), macro_cached.data() + rebased_method, code.size() * sizeof(u32));
- cache_info.hash = Common::HashValue(code);
- cache_info.lle_program = Compile(code);
- }
-
- auto hle_program = hle_macros->GetHLEProgram(cache_info.hash);
- if (!hle_program || Settings::values.disable_macro_hle) {
- maxwell3d.RefreshParameters();
- cache_info.lle_program->Execute(parameters, method);
+ ci.hash = Common::HashValue(code);
+ ci.program = Compile(maxwell3d, code);
} else {
- cache_info.has_hle_program = true;
- cache_info.hle_program = std::move(hle_program);
- cache_info.hle_program->Execute(parameters, method);
+ ci.program = Compile(maxwell3d, macro_code->second);
+ ci.hash = Common::HashValue(macro_code->second);
}
-
+ if (CanBeHLEProgram(ci.hash) && !Settings::values.disable_macro_hle) {
+ ci.program = GetHLEProgram(ci.hash);
+ } else {
+ maxwell3d.RefreshParameters();
+ }
+ execute_variant(ci.program);
if (Settings::values.dump_macros) {
- Dump(cache_info.hash, macro_code->second, cache_info.has_hle_program);
+ Dump(ci.hash, macro_code->second, !std::holds_alternative(ci.program));
}
}
}
-std::unique_ptr MacroEngine::Compile(const std::vector& code) {
+AnyCachedMacro MacroEngine::Compile(Engines::Maxwell3D& maxwell3d, std::span code) {
#ifdef ARCHITECTURE_x86_64
if (!is_interpreted)
- return std::make_unique(maxwell3d, code);
-#endif
- return std::make_unique(maxwell3d, code);
-}
-
-std::optional GetMacroEngine(Engines::Maxwell3D& maxwell3d) {
-#ifdef ARCHITECTURE_x86_64
- return std::make_optional(maxwell3d, bool(Settings::values.disable_macro_jit));
-#else
- return std::make_optional(maxwell3d, true);
+ return std::make_unique(code);
#endif
+ return MacroInterpreterImpl(code);
}
} // namespace Tegra
diff --git a/src/video_core/macro.h b/src/video_core/macro.h
index 9bdb4219ce..a9a8f2de04 100644
--- a/src/video_core/macro.h
+++ b/src/video_core/macro.h
@@ -7,8 +7,10 @@
#pragma once
#include
-#include