diff --git a/.ci/actool.sh b/.ci/actool.sh new file mode 100755 index 0000000000..5be658d2bb --- /dev/null +++ b/.ci/actool.sh @@ -0,0 +1,22 @@ +#!/bin/sh -e + +# SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +# SPDX-License-Identifier: GPL-3.0-or-later + +# SPDX-FileCopyrightText: Copyright 2026 crueter +# SPDX-License-Identifier: GPL-3.0-or-later + +_svg=dev.eden_emu.eden.svg +_icon=dist/eden.icon +_composed="$_icon/Assets/$_svg" +_svg="dist/$_svg" + +rm "$_composed" +cp "$_svg" "$_composed" + +xcrun actool "$_icon" \ + --compile dist \ + --platform macosx \ + --minimum-deployment-target 11.0 \ + --app-icon eden \ + --output-partial-info-plist /dev/null diff --git a/.github/ISSUE_TEMPLATE/blank_issue_template.yml b/.forgejo/ISSUE_TEMPLATE/blank_issue_template.yml similarity index 100% rename from .github/ISSUE_TEMPLATE/blank_issue_template.yml rename to .forgejo/ISSUE_TEMPLATE/blank_issue_template.yml diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.forgejo/ISSUE_TEMPLATE/bug_report.yml similarity index 100% rename from .github/ISSUE_TEMPLATE/bug_report.yml rename to .forgejo/ISSUE_TEMPLATE/bug_report.yml diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.forgejo/ISSUE_TEMPLATE/config.yml similarity index 100% rename from .github/ISSUE_TEMPLATE/config.yml rename to .forgejo/ISSUE_TEMPLATE/config.yml diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.forgejo/ISSUE_TEMPLATE/feature_request.yml similarity index 100% rename from .github/ISSUE_TEMPLATE/feature_request.yml rename to .forgejo/ISSUE_TEMPLATE/feature_request.yml diff --git a/.github/workflows/license-header.yml b/.forgejo/workflows/license-header.yml similarity index 100% rename from .github/workflows/license-header.yml rename to .forgejo/workflows/license-header.yml diff --git a/.github/workflows/sources.yml b/.forgejo/workflows/sources.yml similarity index 100% rename from .github/workflows/sources.yml rename to .forgejo/workflows/sources.yml diff --git a/.github/workflows/strings.yml b/.forgejo/workflows/strings.yml similarity index 100% rename from .github/workflows/strings.yml rename to .forgejo/workflows/strings.yml diff --git a/.github/workflows/translations.yml b/.forgejo/workflows/translations.yml similarity index 97% rename from .github/workflows/translations.yml rename to .forgejo/workflows/translations.yml index 92bb1fdf5d..16ce4f1808 100644 --- a/.github/workflows/translations.yml +++ b/.forgejo/workflows/translations.yml @@ -3,8 +3,7 @@ name: tx-pull on: # monday, wednesday, saturday at 2pm schedule: - cron: - - '0 14 * * 1,3,6' + cron: '0 14 * * 1,3,6' workflow_dispatch: jobs: @@ -59,4 +58,3 @@ jobs: -H 'Authorization: Bearer ${{ secrets.CI_FJ_TOKEN }}' \ -H 'Content-Type: application/json' \ -d "@data.json" --fail - diff --git a/.patch/mcl/0001-assert-macro.patch b/.patch/mcl/0001-assert-macro.patch deleted file mode 100644 index 2d7d0dd1b0..0000000000 --- a/.patch/mcl/0001-assert-macro.patch +++ /dev/null @@ -1,55 +0,0 @@ -diff --git a/include/mcl/assert.hpp b/include/mcl/assert.hpp -index f77dbe7..9ec0b9c 100644 ---- a/include/mcl/assert.hpp -+++ b/include/mcl/assert.hpp -@@ -23,8 +23,11 @@ template - - } // namespace mcl::detail - -+#ifndef UNREACHABLE - #define UNREACHABLE() ASSERT_FALSE("Unreachable code!") -+#endif - -+#ifndef ASSERT - #define ASSERT(expr) \ - [&] { \ - if (std::is_constant_evaluated()) { \ -@@ -37,7 +40,9 @@ template - } \ - } \ - }() -+#endif - -+#ifndef ASSERT_MSG - #define ASSERT_MSG(expr, ...) \ - [&] { \ - if (std::is_constant_evaluated()) { \ -@@ -50,13 +55,24 @@ template - } \ - } \ - }() -+#endif - -+#ifndef ASSERT_FALSE - #define ASSERT_FALSE(...) ::mcl::detail::assert_terminate("false", __VA_ARGS__) -+#endif - - #if defined(NDEBUG) || defined(MCL_IGNORE_ASSERTS) --# define DEBUG_ASSERT(expr) ASSUME(expr) --# define DEBUG_ASSERT_MSG(expr, ...) ASSUME(expr) -+# ifndef DEBUG_ASSERT -+# define DEBUG_ASSERT(expr) ASSUME(expr) -+# endif -+# ifndef DEBUG_ASSERT_MSG -+# define DEBUG_ASSERT_MSG(expr, ...) ASSUME(expr) -+# endif - #else --# define DEBUG_ASSERT(expr) ASSERT(expr) --# define DEBUG_ASSERT_MSG(expr, ...) ASSERT_MSG(expr, __VA_ARGS__) -+# ifndef DEBUG_ASSERT -+# define DEBUG_ASSERT(expr) ASSERT(expr) -+# endif -+# ifndef DEBUG_ASSERT_MSG -+# define DEBUG_ASSERT_MSG(expr, ...) ASSERT_MSG(expr, __VA_ARGS__) -+# endif - #endif diff --git a/CMakeLists.txt b/CMakeLists.txt index 07b2813448..aa25ce9c44 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,6 +69,7 @@ endif() # my unity/jumbo build option(ENABLE_UNITY_BUILD "Enable Unity/Jumbo build" OFF) if(MSVC AND ENABLE_UNITY_BUILD) + message(STATUS "Unity build") # Unity builds need big objects for MSVC... add_compile_options(/bigobj) endif() @@ -150,8 +151,8 @@ if (MSVC AND ARCHITECTURE_x86) endif() if (CXX_CLANG_CL) + # clang-cl prints literally 10000+ warnings without this add_compile_options( - # clang-cl prints literally 10000+ warnings without this $<$:-Wno-unused-command-line-argument> $<$:-Wno-unsafe-buffer-usage> $<$:-Wno-unused-value> @@ -161,12 +162,12 @@ if (CXX_CLANG_CL) $<$:-Wno-deprecated-declarations> $<$:-Wno-cast-function-type-mismatch> $<$:/EHsc>) - # REQUIRED CPU features IN Windows-amd64 if (ARCHITECTURE_x86_64) add_compile_options( $<$:-msse4.1> - $<$:-mcx16>) + $<$:-mcx16> + ) endif() endif() @@ -402,13 +403,15 @@ if (Boost_ADDED) if (NOT MSVC OR CXX_CLANG) # boost sucks if (PLATFORM_SUN) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthreads") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthreads") + add_compile_options($<$:-pthreads>) endif() - target_compile_options(boost_heap INTERFACE -Wno-shadow) - target_compile_options(boost_icl INTERFACE -Wno-shadow) - target_compile_options(boost_asio INTERFACE -Wno-conversion -Wno-implicit-fallthrough) + target_compile_options(boost_heap INTERFACE $<$:-Wno-shadow>) + target_compile_options(boost_icl INTERFACE $<$:-Wno-shadow>) + target_compile_options(boost_asio INTERFACE + $<$:-Wno-conversion> + $<$:-Wno-implicit-fallthrough> + ) endif() endif() @@ -447,7 +450,7 @@ if (NOT YUZU_STATIC_ROOM) if (Opus_ADDED) if (MSVC AND CXX_CLANG) target_compile_options(opus PRIVATE - -Wno-implicit-function-declaration + $<$:-Wno-implicit-function-declaration> ) endif() endif() @@ -491,10 +494,10 @@ endfunction() # ============================================= if (APPLE) - # Umbrella framework for everything GUI-related - find_library(COCOA_LIBRARY Cocoa REQUIRED) - find_library(IOKIT_LIBRARY IOKit REQUIRED) - set(PLATFORM_LIBRARIES ${COCOA_LIBRARY} ${IOKIT_LIBRARY} ${COREVIDEO_LIBRARY}) + foreach(fw Carbon Metal Cocoa IOKit CoreVideo CoreMedia) + find_library(${fw}_LIBRARY ${fw} REQUIRED) + list(APPEND PLATFORM_LIBRARIES ${${fw}_LIBRARY}) + endforeach() elseif (WIN32) # Target Windows 10 add_compile_definitions(_WIN32_WINNT=0x0A00 WINVER=0x0A00) @@ -531,7 +534,6 @@ if (NOT YUZU_STATIC_ROOM) find_package(SPIRV-Tools) find_package(sirit) find_package(gamemode) - find_package(mcl) find_package(frozen) if (ARCHITECTURE_riscv64) diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 5b0adad8dd..947a4963ee 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -37,10 +37,10 @@ set(GIT_DESC ${BUILD_VERSION}) # Auto-updater metadata! Must somewhat mirror GitHub API endpoint if (NIGHTLY_BUILD) - set(BUILD_AUTO_UPDATE_WEBSITE "https://github.com") - set(BUILD_AUTO_UPDATE_API "api.github.com") - set(BUILD_AUTO_UPDATE_API_PATH "/repos/") - set(BUILD_AUTO_UPDATE_REPO "Eden-CI/Nightly") + set(BUILD_AUTO_UPDATE_WEBSITE "https://git.eden-emu.dev") + set(BUILD_AUTO_UPDATE_API "git.eden-emu.dev") + set(BUILD_AUTO_UPDATE_API_PATH "/api/v1/repos/") + set(BUILD_AUTO_UPDATE_REPO "eden-ci/nightly") set(REPO_NAME "Eden Nightly") else() set(BUILD_AUTO_UPDATE_WEBSITE "https://git.eden-emu.dev") diff --git a/cpmfile.json b/cpmfile.json index c938e67e88..1bb29afae4 100644 --- a/cpmfile.json +++ b/cpmfile.json @@ -87,8 +87,8 @@ "bundled": true }, "llvm-mingw": { - "repo": "misc/llvm-mingw", - "git_host": "git.crueter.xyz", + "repo": "eden-emu/llvm-mingw", + "git_host": "git.eden-emu.dev", "tag": "%VERSION%", "version": "20250828", "artifact": "clang-rt-builtins.tar.zst", diff --git a/dist/Assets.car b/dist/Assets.car index eb54881fac..9ae9bca8a4 100644 Binary files a/dist/Assets.car and b/dist/Assets.car differ diff --git a/dist/dev.eden_emu.eden.svg b/dist/dev.eden_emu.eden.svg index b125f4fb80..7711945aa4 100644 --- a/dist/dev.eden_emu.eden.svg +++ b/dist/dev.eden_emu.eden.svg @@ -1,196 +1,89 @@ + + - - - - - - - Madeline_Dev - mailto:madelvidel@gmail.com - - - 2025 - - 2025 Eden Emulator Project - https://git.eden-emu.dev - - - + xmlns:svg="http://www.w3.org/2000/svg"> + + id="defs1"> + + + + + + + + + + + + + + + diff --git a/dist/eden.bmp b/dist/eden.bmp index 498d6f3893..cffc04b308 100644 Binary files a/dist/eden.bmp and b/dist/eden.bmp differ diff --git a/dist/eden.icns b/dist/eden.icns index 680800951d..82529ba224 100644 Binary files a/dist/eden.icns and b/dist/eden.icns differ diff --git a/dist/eden.ico b/dist/eden.ico index 187013ae63..106742c9ba 100644 Binary files a/dist/eden.ico and b/dist/eden.ico differ diff --git a/dist/eden.icon/Assets/dev.eden_emu.eden.svg b/dist/eden.icon/Assets/dev.eden_emu.eden.svg new file mode 100644 index 0000000000..f88b52f625 --- /dev/null +++ b/dist/eden.icon/Assets/dev.eden_emu.eden.svg @@ -0,0 +1,230 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dist/eden.icon/icon.json b/dist/eden.icon/icon.json new file mode 100644 index 0000000000..1f1e7da516 --- /dev/null +++ b/dist/eden.icon/icon.json @@ -0,0 +1,37 @@ +{ + "fill" : { + "automatic-gradient" : "srgb:0.00000,0.00000,0.00000,1.00000" + }, + "groups" : [ + { + "layers" : [ + { + "fill" : "none", + "image-name" : "dev.eden_emu.eden.svg", + "name" : "dev.eden_emu.eden", + "position" : { + "scale" : 1.8, + "translation-in-points" : [ + 0, + 0 + ] + } + } + ], + "shadow" : { + "kind" : "neutral", + "opacity" : 0.5 + }, + "translucency" : { + "enabled" : true, + "value" : 0.5 + } + } + ], + "supported-platforms" : { + "circles" : [ + "watchOS" + ], + "squares" : "shared" + } +} \ No newline at end of file diff --git a/dist/icon_variations/aprilfools2026.svg b/dist/icon_variations/aprilfools2026.svg new file mode 100644 index 0000000000..7711945aa4 --- /dev/null +++ b/dist/icon_variations/aprilfools2026.svg @@ -0,0 +1,89 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/dist/icon_variations/aprilfools2026_bgcolor b/dist/icon_variations/aprilfools2026_bgcolor new file mode 100644 index 0000000000..fabebfa717 --- /dev/null +++ b/dist/icon_variations/aprilfools2026_bgcolor @@ -0,0 +1 @@ +#43fcfcff diff --git a/dist/qt_themes/default/icons/256x256/eden.png b/dist/qt_themes/default/icons/256x256/eden.png index fbee9f1836..d7286ac4c6 100644 Binary files a/dist/qt_themes/default/icons/256x256/eden.png and b/dist/qt_themes/default/icons/256x256/eden.png differ diff --git a/docs/Deps.md b/docs/Deps.md index fe1f7a14b2..a34e838534 100644 --- a/docs/Deps.md +++ b/docs/Deps.md @@ -76,7 +76,6 @@ Certain other dependencies will be fetched by CPM regardless. System packages *c * This package is known to be broken on the AUR. * [cpp-jwt](https://github.com/arun11299/cpp-jwt) 1.4+ - if `ENABLE_WEB_SERVICE` is on * [unordered-dense](https://github.com/martinus/unordered_dense) -* [mcl](https://github.com/azahar-emu/mcl) - subject to removal On amd64: diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 7b4c481ba5..00bdf10a4f 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -76,9 +76,6 @@ if (ARCHITECTURE_riscv64) AddJsonPackage(biscuit) endif() -# mcl -AddJsonPackage(mcl) - # Vulkan stuff AddDependentPackages(vulkan-headers vulkan-utility-libraries) @@ -109,16 +106,15 @@ if(ENABLE_CUBEB) if (cubeb_ADDED) if (NOT MSVC) if (TARGET speex) - target_compile_options(speex PRIVATE -Wno-sign-compare) + target_compile_options(speex PRIVATE $<$:-Wno-sign-compare>) endif() - set_target_properties(cubeb PROPERTIES COMPILE_OPTIONS "") target_compile_options(cubeb INTERFACE - -Wno-implicit-const-int-float-conversion - -Wno-shadow - -Wno-missing-declarations - -Wno-return-type - -Wno-uninitialized + $<$:-Wno-implicit-const-int-float-conversion> + $<$:-Wno-shadow> + $<$:-Wno-missing-declarations> + $<$:-Wno-return-type> + $<$:-Wno-uninitialized> ) else() target_compile_options(cubeb PRIVATE @@ -184,7 +180,9 @@ if (YUZU_USE_BUNDLED_SIRIT) else() AddJsonPackage(sirit) if(MSVC AND CXX_CLANG) - target_compile_options(siritobj PRIVATE -Wno-error=unused-command-line-argument) + target_compile_options(siritobj PRIVATE + $<$:-Wno-error=unused-command-line-argument> + ) endif() endif() @@ -220,7 +218,7 @@ AddJsonPackage(vulkan-memory-allocator) if (VulkanMemoryAllocator_ADDED) if (CXX_CLANG) target_compile_options(VulkanMemoryAllocator INTERFACE - -Wno-unused-variable + $<$:-Wno-unused-variable> ) elseif(MSVC) target_compile_options(VulkanMemoryAllocator INTERFACE diff --git a/externals/cpmfile.json b/externals/cpmfile.json index 8209e431a3..f849426a4d 100644 --- a/externals/cpmfile.json +++ b/externals/cpmfile.json @@ -208,18 +208,6 @@ "version": "0.9.1", "git_version": "0.19.0" }, - "mcl": { - "version": "0.1.12", - "repo": "azahar-emu/mcl", - "sha": "7b08d83418", - "hash": "9c6ba624cb22ef622f78046a82abb99bf5026284ba17dfacaf46ac842cbd3b0f515f5ba45a1598c7671318a78a2e648db72ce8d10e7537f34e39800bdcb57694", - "options": [ - "MCL_INSTALL OFF" - ], - "patches": [ - "0001-assert-macro.patch" - ] - }, "libusb": { "repo": "libusb/libusb", "tag": "v%VERSION%", @@ -246,12 +234,13 @@ }, "tzdb": { "package": "nx_tzdb", - "repo": "misc/tzdb_to_nx", - "git_host": "git.crueter.xyz", + "repo": "eden-emu/tzdb_to_nx", + "git_host": "git.eden-emu.dev", "artifact": "%VERSION%.tar.gz", "tag": "%VERSION%", - "hash": "dc37a189a44ce8b5c988ca550582431a6c7eadfd3c6e709bee6277116ee803e714333e85c9e6cbb5c69346a14d6f2cc7ed96e8aa09cc5fb8a89f945059651db6", - "version": "121125" + "hash": "cce65a12bf90f4ead43b24a0b95dfad77ac3d9bfbaaf66c55e6701346e7a1e44ca5d2f23f47ee35ee02271eb1082bf1762af207aad9fb236f1c8476812d008ed", + "version": "121125", + "git_version": "230326" }, "vulkan-headers": { "repo": "KhronosGroup/Vulkan-Headers", diff --git a/externals/libusb/CMakeLists.txt b/externals/libusb/CMakeLists.txt index 7eae81d3b6..47b54f43cc 100644 --- a/externals/libusb/CMakeLists.txt +++ b/externals/libusb/CMakeLists.txt @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +# SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project # SPDX-License-Identifier: GPL-3.0-or-later # SPDX-FileCopyrightText: 2020 yuzu Emulator Project @@ -232,7 +232,7 @@ else() # MINGW OR (${CMAKE_SYSTEM_NAME} MATCHES "Linux") ) find_package(Threads REQUIRED) if(THREADS_HAVE_PTHREAD_ARG) - target_compile_options(usb PUBLIC "-pthread") + target_compile_options(usb PUBLIC $<$:-pthread>) endif() if(CMAKE_THREAD_LIBS_INIT) target_link_libraries(usb PRIVATE "${CMAKE_THREAD_LIBS_INIT}") diff --git a/externals/renderdoc/renderdoc_app.h b/externals/renderdoc/renderdoc_app.h index e6c1511deb..c379f0ac40 100644 --- a/externals/renderdoc/renderdoc_app.h +++ b/externals/renderdoc/renderdoc_app.h @@ -7,7 +7,7 @@ /****************************************************************************** * The MIT License (MIT) * - * Copyright (c) 2019-2025 Baldur Karlsson + * Copyright (c) 2015-2026 Baldur Karlsson * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -72,6 +72,10 @@ extern "C" { // truncated version when only a uint64_t is available (e.g. Vulkan tags): #define RENDERDOC_ShaderDebugMagicValue_truncated 0x48656670eab25520ULL +// this is a magic value for vulkan user tags to indicate which dispatchable API objects are which +// for object annotations +#define RENDERDOC_APIObjectAnnotationHelper 0xfbb3b337b664d0adULL + ////////////////////////////////////////////////////////////////////////////////////////////////// // RenderDoc capture options // @@ -564,6 +568,128 @@ typedef uint32_t(RENDERDOC_CC *pRENDERDOC_DiscardFrameCapture)(RENDERDOC_DeviceP // multiple times only the last title will be used. typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureTitle)(const char *title); +// Annotations API: +// +// These functions allow you to specify annotations either on a per-command level, or a per-object +// level. +// +// Basic types of annotations are supported, as well as vector versions and references to API objects. +// +// The annotations are stored as keys, with the key being a dot-separated path allowing arbitrary +// nesting and user organisation. The keys are sorted in human order so `foo.2.bar` will be displayed +// before `foo.10.bar` to allow creation of arrays if desired. +// +// Deleting an annotation can be done by assigning an empty value to it. + +// the type of an annotation value, or Empty to delete an annotation +typedef enum RENDERDOC_AnnotationType +{ + eRENDERDOC_Empty, + eRENDERDOC_Bool, + eRENDERDOC_Int32, + eRENDERDOC_UInt32, + eRENDERDOC_Int64, + eRENDERDOC_UInt64, + eRENDERDOC_Float, + eRENDERDOC_Double, + eRENDERDOC_String, + eRENDERDOC_APIObject, + eRENDERDOC_AnnotationMax = 0x7FFFFFFF, +} RENDERDOC_AnnotationType; + +// a union with vector annotation value data +typedef union RENDERDOC_AnnotationVectorValue +{ + bool boolean[4]; + int32_t int32[4]; + int64_t int64[4]; + uint32_t uint32[4]; + uint64_t uint64[4]; + float float32[4]; + double float64[4]; +} RENDERDOC_AnnotationVectorValue; + +// a union with scalar annotation value data +typedef union RENDERDOC_AnnotationValue +{ + bool boolean; + int32_t int32; + int64_t int64; + uint32_t uint32; + uint64_t uint64; + float float32; + double float64; + + RENDERDOC_AnnotationVectorValue vector; + + const char *string; + void *apiObject; +} RENDERDOC_AnnotationValue; + +// a struct for specifying a GL object, as we don't have pointers we can use so instead we specify a +// pointer to this struct giving both the type and the name +typedef struct RENDERDOC_GLResourceReference +{ + // this is the same GLenum identifier as passed to glObjectLabel + uint32_t identifier; + uint32_t name; +} GLResourceReference; + +// simple C++ helpers to avoid the need for a temporary objects for value passing and GL object specification +#ifdef __cplusplus +struct RDGLObjectHelper +{ + RENDERDOC_GLResourceReference gl; + + RDGLObjectHelper(uint32_t identifier, uint32_t name) + { + gl.identifier = identifier; + gl.name = name; + } + + operator RENDERDOC_GLResourceReference *() { return ≷ } +}; + +struct RDAnnotationHelper +{ + RENDERDOC_AnnotationValue val; + + RDAnnotationHelper(bool b) { val.boolean = b; } + RDAnnotationHelper(int32_t i) { val.int32 = i; } + RDAnnotationHelper(int64_t i) { val.int64 = i; } + RDAnnotationHelper(uint32_t i) { val.uint32 = i; } + RDAnnotationHelper(uint64_t i) { val.uint64 = i; } + RDAnnotationHelper(float f) { val.float32 = f; } + RDAnnotationHelper(double d) { val.float64 = d; } + RDAnnotationHelper(const char *s) { val.string = s; } + + operator RENDERDOC_AnnotationValue *() { return &val; } +}; +#endif + +// The device is specified in the same way as other API calls that take a RENDERDOC_DevicePointer +// to specify the device. +// +// The object or queue/commandbuffer will depend on the graphics API in question. +// +// Return value: +// 0 - The annotation was applied successfully. +// 1 - The device is unknown/invalid +// 2 - The device is valid but the annotation is not supported for API-specific reasons, such as an +// unrecognised or invalid object or queue/commandbuffer +// 3 - The call is ill-formed or invalid e.g. empty is specified with a value pointer, or non-empty +// is specified with a NULL value pointer +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_SetObjectAnnotation)(RENDERDOC_DevicePointer device, + void *object, const char *key, + RENDERDOC_AnnotationType valueType, + uint32_t valueVectorWidth, + const RENDERDOC_AnnotationValue *value); + +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_SetCommandAnnotation)( + RENDERDOC_DevicePointer device, void *queueOrCommandBuffer, const char *key, + RENDERDOC_AnnotationType valueType, uint32_t valueVectorWidth, + const RENDERDOC_AnnotationValue *value); + ////////////////////////////////////////////////////////////////////////////////////////////////// // RenderDoc API versions // @@ -592,6 +718,7 @@ typedef enum RENDERDOC_Version eRENDERDOC_API_Version_1_4_2 = 10402, // RENDERDOC_API_1_4_2 = 1 04 02 eRENDERDOC_API_Version_1_5_0 = 10500, // RENDERDOC_API_1_5_0 = 1 05 00 eRENDERDOC_API_Version_1_6_0 = 10600, // RENDERDOC_API_1_6_0 = 1 06 00 + eRENDERDOC_API_Version_1_7_0 = 10700, // RENDERDOC_API_1_7_0 = 1 07 00 } RENDERDOC_Version; // API version changelog: @@ -622,8 +749,10 @@ typedef enum RENDERDOC_Version // 1.5.0 - Added feature: ShowReplayUI() to request that the replay UI show itself if connected // 1.6.0 - Added feature: SetCaptureTitle() which can be used to set a title for a // capture made with StartFrameCapture() or EndFrameCapture() +// 1.7.0 - Added feature: SetObjectAnnotation() / SetCommandAnnotation() for adding rich +// annotations to objects and command streams -typedef struct RENDERDOC_API_1_6_0 +typedef struct RENDERDOC_API_1_7_0 { pRENDERDOC_GetAPIVersion GetAPIVersion; @@ -701,20 +830,25 @@ typedef struct RENDERDOC_API_1_6_0 // new function in 1.6.0 pRENDERDOC_SetCaptureTitle SetCaptureTitle; -} RENDERDOC_API_1_6_0; -typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_0; -typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_1; -typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_2; -typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_0; -typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_1; -typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_2; -typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_2_0; -typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_3_0; -typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_0; -typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_1; -typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_2; -typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_5_0; + // new functions in 1.7.0 + pRENDERDOC_SetObjectAnnotation SetObjectAnnotation; + pRENDERDOC_SetCommandAnnotation SetCommandAnnotation; +} RENDERDOC_API_1_7_0; + +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_0_0; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_0_1; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_0_2; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_1_0; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_1_1; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_1_2; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_2_0; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_3_0; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_4_0; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_4_1; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_4_2; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_5_0; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_6_0; ////////////////////////////////////////////////////////////////////////////////////////////////// // RenderDoc API entry point diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 482fb45c6a..c184d3b493 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,7 +9,7 @@ include_directories(.) if (ENABLE_UNITY_BUILD) set(CMAKE_UNITY_BUILD ON) - set(CMAKE_UNITY_BUILD_BATCH_SIZE 5000) + set(CMAKE_UNITY_BUILD_BATCH_SIZE 100) endif() # Dynarmic @@ -123,47 +123,42 @@ if (MSVC AND NOT CXX_CLANG) else() if (NOT MSVC) add_compile_options( - -fwrapv - -fno-rtti # Disable RTTI - -pipe - ) + $<$:-fwrapv> + $<$:-pipe> + # Disable RTTI (C++ only) + $<$:-fno-rtti>) endif() add_compile_options( - -Werror=all - -Werror=extra - -Werror=missing-declarations - -Werror=shadow - -Werror=unused - - -Wno-attributes - -Wno-invalid-offsetof - -Wno-unused-parameter - -Wno-missing-field-initializers - ) + $<$:-Werror=all> + $<$:-Werror=extra> + $<$:-Werror=missing-declarations> + $<$:-Werror=shadow> + $<$:-Werror=unused> + $<$:-Wno-attributes> + $<$:-Wno-invalid-offsetof> + $<$:-Wno-unused-parameter> + $<$:-Wno-missing-field-initializers>) if (CXX_CLANG OR CXX_ICC OR CXX_APPLE) # Clang, AppleClang, or Intel C++ if (NOT MSVC) add_compile_options( - -Werror=shadow-uncaptured-local - -Werror=implicit-fallthrough - -Werror=type-limits - ) + $<$:-Werror=shadow-uncaptured-local> + $<$:-Werror=implicit-fallthrough> + $<$:-Werror=type-limits>) endif() - add_compile_options( - -Wno-braced-scalar-init - -Wno-unused-private-field - -Wno-nullability-completeness - ) + $<$:-Wno-braced-scalar-init> + $<$:-Wno-unused-private-field> + $<$:-Wno-nullability-completeness>) endif() if (ARCHITECTURE_x86_64) - add_compile_options("-mcx16") + add_compile_options(-mcx16) endif() if (APPLE AND CXX_CLANG) - add_compile_options("-stdlib=libc++") + add_compile_options($<$:-stdlib=libc++>) endif() # GCC bugs @@ -171,10 +166,9 @@ else() # These diagnostics would be great if they worked, but are just completely broken # and produce bogus errors on external libraries like fmt. add_compile_options( - -Wno-array-bounds - -Wno-stringop-overread - -Wno-stringop-overflow - ) + $<$:-Wno-array-bounds> + $<$:-Wno-stringop-overread> + $<$:-Wno-stringop-overflow>) endif() # Set file offset size to 64 bits. diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt index 2764d7eac6..44290fd4b6 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt @@ -25,6 +25,11 @@ import android.hardware.SensorEventListener import android.hardware.SensorManager import android.os.Build import android.os.Bundle +import android.os.Handler +import android.os.Looper +import androidx.navigation.NavOptions +import org.yuzu.yuzu_emu.fragments.EmulationFragment +import org.yuzu.yuzu_emu.utils.CustomSettingsHandler import android.util.Rational import android.view.InputDevice import android.view.KeyEvent @@ -87,6 +92,28 @@ class EmulationActivity : AppCompatActivity(), SensorEventListener, InputManager private val emulationViewModel: EmulationViewModel by viewModels() private var foregroundService: Intent? = null + private val mainHandler = Handler(Looper.getMainLooper()) + private var pendingRomSwapIntent: Intent? = null + private var isWaitingForRomSwapStop = false + private var romSwapNativeStopped = false + private var romSwapThreadStopped = false + private var romSwapGeneration = 0 + private var hasEmulationSession = processHasEmulationSession + private val romSwapStopTimeoutRunnable = Runnable { onRomSwapStopTimeout() } + + private fun onRomSwapStopTimeout() { + if (!isWaitingForRomSwapStop) { + return + } + Log.warning("[EmulationActivity] ROM swap stop timed out; retrying native stop and continuing to wait") + NativeLibrary.stopEmulation() + scheduleRomSwapStopTimeout() + } + + private fun scheduleRomSwapStopTimeout() { + mainHandler.removeCallbacks(romSwapStopTimeoutRunnable) + mainHandler.postDelayed(romSwapStopTimeoutRunnable, ROM_SWAP_STOP_TIMEOUT_MS) + } override fun attachBaseContext(base: Context) { super.attachBaseContext(YuzuApplication.applyLanguage(base)) @@ -128,9 +155,29 @@ class EmulationActivity : AppCompatActivity(), SensorEventListener, InputManager binding = ActivityEmulationBinding.inflate(layoutInflater) setContentView(binding.root) + val launchIntent = Intent(intent) + val shouldDeferLaunchForSwap = hasEmulationSession && isSwapIntent(launchIntent) + if (shouldDeferLaunchForSwap) { + Log.info("[EmulationActivity] onCreate detected existing session; deferring new game setup for swap") + emulationViewModel.setIsEmulationStopping(true) + emulationViewModel.setEmulationStopped(false) + } + val navHostFragment = supportFragmentManager.findFragmentById(R.id.fragment_container) as NavHostFragment - navHostFragment.navController.setGraph(R.navigation.emulation_navigation, intent.extras) + val initialArgs = if (shouldDeferLaunchForSwap) { + Bundle(intent.extras ?: Bundle()).apply { + processSessionGame?.let { putParcelable("game", it) } + } + } else { + intent.extras + } + navHostFragment.navController.setGraph(R.navigation.emulation_navigation, initialArgs) + if (shouldDeferLaunchForSwap) { + mainHandler.post { + handleSwapIntent(launchIntent) + } + } isActivityRecreated = savedInstanceState != null @@ -210,6 +257,7 @@ class EmulationActivity : AppCompatActivity(), SensorEventListener, InputManager } override fun onDestroy() { + mainHandler.removeCallbacks(romSwapStopTimeoutRunnable) super.onDestroy() inputManager.unregisterInputDeviceListener(this) stopForegroundService(this) @@ -228,17 +276,123 @@ class EmulationActivity : AppCompatActivity(), SensorEventListener, InputManager override fun onNewIntent(intent: Intent) { super.onNewIntent(intent) - setIntent(intent) - - // Reset navigation graph with new intent data to recreate EmulationFragment - val navHostFragment = - supportFragmentManager.findFragmentById(R.id.fragment_container) as NavHostFragment - navHostFragment.navController.setGraph(R.navigation.emulation_navigation, intent.extras) - + handleSwapIntent(intent) nfcReader.onNewIntent(intent) InputHandler.updateControllerData() } + private fun isSwapIntent(intent: Intent): Boolean { + return when { + intent.getBooleanExtra(EXTRA_OVERLAY_GAMELESS_EDIT_MODE, false) -> false + intent.action == CustomSettingsHandler.CUSTOM_CONFIG_ACTION -> true + intent.data != null -> true + else -> { + val extras = intent.extras + extras != null && + BundleCompat.getParcelable(extras, EXTRA_SELECTED_GAME, Game::class.java) != null + } + } + } + + private fun handleSwapIntent(intent: Intent) { + if (!isSwapIntent(intent)) { + return + } + + pendingRomSwapIntent = Intent(intent) + + if (!isWaitingForRomSwapStop) { + Log.info("[EmulationActivity] Begin ROM swap: data=${intent.data}") + isWaitingForRomSwapStop = true + romSwapNativeStopped = false + romSwapThreadStopped = false + romSwapGeneration += 1 + val thisSwapGeneration = romSwapGeneration + emulationViewModel.setIsEmulationStopping(true) + emulationViewModel.setEmulationStopped(false) + val navHostFragment = + supportFragmentManager.findFragmentById(R.id.fragment_container) as? NavHostFragment + val childFragmentManager = navHostFragment?.childFragmentManager + val stoppingFragmentForSwap = + (childFragmentManager?.primaryNavigationFragment as? EmulationFragment) ?: + childFragmentManager + ?.fragments + ?.asReversed() + ?.firstOrNull { + it is EmulationFragment && + it.isAdded && + it.view != null && + !it.isRemoving + } as? EmulationFragment + + val hasSessionForSwap = hasEmulationSession || stoppingFragmentForSwap != null + + if (!hasSessionForSwap) { + romSwapNativeStopped = true + romSwapThreadStopped = true + } else { + if (stoppingFragmentForSwap != null) { + stoppingFragmentForSwap.stopForRomSwap() + stoppingFragmentForSwap.notifyWhenEmulationThreadStops { + if (!isWaitingForRomSwapStop || romSwapGeneration != thisSwapGeneration) { + return@notifyWhenEmulationThreadStops + } + romSwapThreadStopped = true + Log.info("[EmulationActivity] ROM swap thread stop acknowledged") + launchPendingRomSwap(force = false) + } + } else { + Log.warning("[EmulationActivity] ROM swap stop target fragment not found; requesting native stop") + romSwapThreadStopped = true + NativeLibrary.stopEmulation() + } + + scheduleRomSwapStopTimeout() + } + } + + launchPendingRomSwap(force = false) + } + + private fun launchPendingRomSwap(force: Boolean) { + if (!isWaitingForRomSwapStop) { + return + } + if (!force && (!romSwapNativeStopped || !romSwapThreadStopped)) { + return + } + val swapIntent = pendingRomSwapIntent ?: return + Log.info("[EmulationActivity] Launching pending ROM swap: data=${swapIntent.data}") + pendingRomSwapIntent = null + isWaitingForRomSwapStop = false + romSwapNativeStopped = false + romSwapThreadStopped = false + mainHandler.removeCallbacks(romSwapStopTimeoutRunnable) + applyGameLaunchIntent(swapIntent) + } + + private fun applyGameLaunchIntent(intent: Intent) { + hasEmulationSession = true + processHasEmulationSession = true + emulationViewModel.setIsEmulationStopping(false) + emulationViewModel.setEmulationStopped(false) + setIntent(Intent(intent)) + val navHostFragment = + supportFragmentManager.findFragmentById(R.id.fragment_container) as NavHostFragment + val navController = navHostFragment.navController + val startArgs = intent.extras?.let { Bundle(it) } ?: Bundle() + val navOptions = NavOptions.Builder() + .setPopUpTo(R.id.emulationFragment, true) + .build() + + runCatching { + navController.navigate(R.id.emulationFragment, startArgs, navOptions) + }.onFailure { + Log.warning("[EmulationActivity] ROM swap navigate fallback to setGraph: ${it.message}") + navController.setGraph(R.navigation.emulation_navigation, startArgs) + } + } + override fun dispatchKeyEvent(event: KeyEvent): Boolean { if (event.keyCode == KeyEvent.KEYCODE_VOLUME_UP || @@ -608,19 +762,48 @@ class EmulationActivity : AppCompatActivity(), SensorEventListener, InputManager } fun onEmulationStarted() { + if (Looper.myLooper() != Looper.getMainLooper()) { + mainHandler.post { onEmulationStarted() } + return + } + hasEmulationSession = true + processHasEmulationSession = true emulationViewModel.setEmulationStarted(true) + emulationViewModel.setIsEmulationStopping(false) + emulationViewModel.setEmulationStopped(false) NativeLibrary.playTimeManagerStart() } fun onEmulationStopped(status: Int) { - if (status == 0 && emulationViewModel.programChanged.value == -1) { + if (Looper.myLooper() != Looper.getMainLooper()) { + mainHandler.post { onEmulationStopped(status) } + return + } + hasEmulationSession = false + processHasEmulationSession = false + if (isWaitingForRomSwapStop) { + romSwapNativeStopped = true + Log.info("[EmulationActivity] ROM swap native stop acknowledged") + launchPendingRomSwap(force = false) + } else if (status == 0 && emulationViewModel.programChanged.value == -1) { + processSessionGame = null finish() + } else if (!isWaitingForRomSwapStop) { + processSessionGame = null } emulationViewModel.setEmulationStopped(true) } + fun updateSessionGame(game: Game?) { + processSessionGame = game + } + fun onProgramChanged(programIndex: Int) { + if (Looper.myLooper() != Looper.getMainLooper()) { + mainHandler.post { onProgramChanged(programIndex) } + return + } emulationViewModel.setProgramChanged(programIndex) } @@ -644,6 +827,11 @@ class EmulationActivity : AppCompatActivity(), SensorEventListener, InputManager companion object { const val EXTRA_SELECTED_GAME = "SelectedGame" const val EXTRA_OVERLAY_GAMELESS_EDIT_MODE = "overlayGamelessEditMode" + private const val ROM_SWAP_STOP_TIMEOUT_MS = 5000L + @Volatile + private var processHasEmulationSession = false + @Volatile + private var processSessionGame: Game? = null fun stopForegroundService(activity: Activity) { val startIntent = Intent(activity, ForegroundService::class.java) diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/dialogs/ChatDialog.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/dialogs/ChatDialog.kt index 5d6679bd28..431125ca8e 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/dialogs/ChatDialog.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/dialogs/ChatDialog.kt @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later package org.yuzu.yuzu_emu.dialogs @@ -20,6 +20,8 @@ import org.yuzu.yuzu_emu.databinding.DialogChatBinding import org.yuzu.yuzu_emu.databinding.ItemChatMessageBinding import org.yuzu.yuzu_emu.features.settings.model.StringSetting import org.yuzu.yuzu_emu.network.NetPlayManager +import org.yuzu.yuzu_emu.utils.CompatUtils +import org.yuzu.yuzu_emu.utils.FullscreenHelper import java.text.SimpleDateFormat import java.util.* @@ -34,6 +36,13 @@ class ChatDialog(context: Context) : BottomSheetDialog(context) { private lateinit var binding: DialogChatBinding private lateinit var chatAdapter: ChatAdapter private val handler = Handler(Looper.getMainLooper()) + private val hideSystemBars: Boolean by lazy { + runCatching { + FullscreenHelper.shouldHideSystemBars(CompatUtils.findActivity(context)) + }.getOrElse { + FullscreenHelper.isFullscreenEnabled(context) + } + } // TODO(alekpop, crueter): Top drawer for message notifications, perhaps use system notifs? // TODO(alekpop, crueter): Context menu actions for chat users @@ -41,6 +50,7 @@ class ChatDialog(context: Context) : BottomSheetDialog(context) { @SuppressLint("NotifyDataSetChanged") override fun onCreate(savedInstanceState: Bundle?) { super.onCreate(savedInstanceState) + setOnShowListener { applyFullscreenMode() } binding = DialogChatBinding.inflate(LayoutInflater.from(context)) setContentView(binding.root) @@ -75,6 +85,11 @@ class ChatDialog(context: Context) : BottomSheetDialog(context) { } } + override fun onStart() { + super.onStart() + applyFullscreenMode() + } + override fun dismiss() { NetPlayManager.setChatOpen(false) super.dismiss() @@ -108,6 +123,12 @@ class ChatDialog(context: Context) : BottomSheetDialog(context) { private fun scrollToBottom() { binding.chatRecyclerView.scrollToPosition(chatAdapter.itemCount - 1) } + + private fun applyFullscreenMode() { + window?.let { window -> + FullscreenHelper.applyToWindow(window, hideSystemBars) + } + } } class ChatAdapter(private val messages: List) : diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/dialogs/LobbyBrowser.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/dialogs/LobbyBrowser.kt index 6d5c6ef23f..10ff2da6c7 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/dialogs/LobbyBrowser.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/dialogs/LobbyBrowser.kt @@ -31,15 +31,25 @@ import org.yuzu.yuzu_emu.databinding.DialogLobbyBrowserBinding import org.yuzu.yuzu_emu.databinding.ItemLobbyRoomBinding import org.yuzu.yuzu_emu.features.settings.model.StringSetting import org.yuzu.yuzu_emu.network.NetPlayManager +import org.yuzu.yuzu_emu.utils.CompatUtils +import org.yuzu.yuzu_emu.utils.FullscreenHelper import java.util.Locale class LobbyBrowser(context: Context) : BottomSheetDialog(context) { private lateinit var binding: DialogLobbyBrowserBinding private lateinit var adapter: LobbyRoomAdapter private val handler = Handler(Looper.getMainLooper()) + private val hideSystemBars: Boolean by lazy { + runCatching { + FullscreenHelper.shouldHideSystemBars(CompatUtils.findActivity(context)) + }.getOrElse { + FullscreenHelper.isFullscreenEnabled(context) + } + } override fun onCreate(savedInstanceState: Bundle?) { super.onCreate(savedInstanceState) + setOnShowListener { applyFullscreenMode() } behavior.state = BottomSheetBehavior.STATE_EXPANDED behavior.skipCollapsed = @@ -81,6 +91,7 @@ class LobbyBrowser(context: Context) : BottomSheetDialog(context) { behavior.expandedOffset = 0 behavior.skipCollapsed = true behavior.state = BottomSheetBehavior.STATE_EXPANDED + applyFullscreenMode() } private fun setupRecyclerView() { @@ -274,4 +285,10 @@ class LobbyBrowser(context: Context) : BottomSheetDialog(context) { } private inner class ScoreItem(val score: Double, val item: NetPlayManager.RoomInfo) + + private fun applyFullscreenMode() { + window?.let { window -> + FullscreenHelper.applyToWindow(window, hideSystemBars) + } + } } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/dialogs/NetPlayDialog.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/dialogs/NetPlayDialog.kt index 73452b4b69..45ce5fb0cf 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/dialogs/NetPlayDialog.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/dialogs/NetPlayDialog.kt @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later package org.yuzu.yuzu_emu.dialogs @@ -36,6 +36,7 @@ import org.yuzu.yuzu_emu.features.settings.model.StringSetting import org.yuzu.yuzu_emu.network.NetDataValidators import org.yuzu.yuzu_emu.network.NetPlayManager import org.yuzu.yuzu_emu.utils.CompatUtils +import org.yuzu.yuzu_emu.utils.FullscreenHelper import org.yuzu.yuzu_emu.utils.GameHelper class NetPlayDialog(context: Context) : BottomSheetDialog(context) { @@ -43,9 +44,17 @@ class NetPlayDialog(context: Context) : BottomSheetDialog(context) { private val gameNameList: MutableList> = mutableListOf() private val gameIdList: MutableList> = mutableListOf() + private val hideSystemBars: Boolean by lazy { + runCatching { + FullscreenHelper.shouldHideSystemBars(CompatUtils.findActivity(context)) + }.getOrElse { + FullscreenHelper.isFullscreenEnabled(context) + } + } override fun onCreate(savedInstanceState: Bundle?) { super.onCreate(savedInstanceState) + setOnShowListener { applyFullscreenMode() } behavior.state = BottomSheetBehavior.STATE_EXPANDED behavior.state = BottomSheetBehavior.STATE_EXPANDED @@ -118,6 +127,11 @@ class NetPlayDialog(context: Context) : BottomSheetDialog(context) { } } + override fun onStart() { + super.onStart() + applyFullscreenMode() + } + data class NetPlayItems( val option: Int, val name: String, @@ -352,6 +366,11 @@ class NetPlayDialog(context: Context) : BottomSheetDialog(context) { TextValidatorWatcher.validStates.clear() val activity = CompatUtils.findActivity(context) val dialog = BottomSheetDialog(activity) + dialog.setOnShowListener { + dialog.window?.let { window -> + FullscreenHelper.applyToWindow(window, hideSystemBars) + } + } dialog.behavior.state = BottomSheetBehavior.STATE_EXPANDED dialog.behavior.state = BottomSheetBehavior.STATE_EXPANDED @@ -582,6 +601,12 @@ class NetPlayDialog(context: Context) : BottomSheetDialog(context) { dialog.show() } + private fun applyFullscreenMode() { + window?.let { window -> + FullscreenHelper.applyToWindow(window, hideSystemBars) + } + } + private fun showModerationDialog() { val activity = CompatUtils.findActivity(context) val dialog = MaterialAlertDialogBuilder(activity) diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/Settings.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/Settings.kt index 0f89533d8e..b438812d58 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/Settings.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/Settings.kt @@ -104,6 +104,8 @@ object Settings { const val PREF_THEME_MODE = "ThemeMode" const val PREF_BLACK_BACKGROUNDS = "BlackBackgrounds" const val PREF_STATIC_THEME_COLOR = "StaticThemeColor" + const val PREF_APP_FULLSCREEN = "AppFullscreen" + const val APP_FULLSCREEN_DEFAULT = false enum class EmulationOrientation(val int: Int) { Unspecified(0), diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt index a20858f0a9..6f25856cbf 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt @@ -670,15 +670,6 @@ abstract class SettingsItem( valuesId = R.array.dmaAccuracyValues ) ) - put( - SingleChoiceSetting( - IntSetting.FRAME_PACING_MODE, - titleId = R.string.frame_pacing_mode, - descriptionId = R.string.frame_pacing_mode_description, - choicesId = R.array.framePacingModeNames, - valuesId = R.array.framePacingModeValues - ) - ) put( SwitchSetting( BooleanSetting.RENDERER_ASYNCHRONOUS_SHADERS, diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsActivity.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsActivity.kt index d33bbc3d7d..ad1ecba64c 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsActivity.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsActivity.kt @@ -103,6 +103,7 @@ class SettingsActivity : AppCompatActivity() { ) setInsets() + applyFullscreenPreference() } fun navigateBack() { @@ -122,6 +123,18 @@ class SettingsActivity : AppCompatActivity() { } } + override fun onResume() { + super.onResume() + applyFullscreenPreference() + } + + override fun onWindowFocusChanged(hasFocus: Boolean) { + super.onWindowFocusChanged(hasFocus) + if (hasFocus) { + applyFullscreenPreference() + } + } + override fun onStop() { super.onStop() Log.info("[SettingsActivity] Settings activity stopping. Saving settings to INI...") @@ -188,4 +201,8 @@ class SettingsActivity : AppCompatActivity() { windowInsets } } + + private fun applyFullscreenPreference() { + FullscreenHelper.applyToActivity(this) + } } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt index ff25584c92..060a6fe9ae 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt @@ -29,6 +29,8 @@ import org.yuzu.yuzu_emu.features.settings.model.view.* import org.yuzu.yuzu_emu.utils.InputHandler import org.yuzu.yuzu_emu.utils.NativeConfig import org.yuzu.yuzu_emu.utils.DirectoryInitialization +import org.yuzu.yuzu_emu.utils.FullscreenHelper +import androidx.core.content.edit import androidx.fragment.app.FragmentActivity import org.yuzu.yuzu_emu.fragments.MessageDialogFragment @@ -254,7 +256,6 @@ class SettingsFragmentPresenter( add(IntSetting.RENDERER_ACCURACY.key) add(IntSetting.DMA_ACCURACY.key) - add(IntSetting.FRAME_PACING_MODE.key) add(IntSetting.MAX_ANISOTROPY.key) add(IntSetting.RENDERER_VRAM_USAGE_MODE.key) add(IntSetting.RENDERER_ASTC_DECODE_METHOD.key) @@ -1187,6 +1188,39 @@ class SettingsFragmentPresenter( ) ) + val fullscreenSetting: AbstractBooleanSetting = object : AbstractBooleanSetting { + override fun getBoolean(needsGlobal: Boolean): Boolean = + FullscreenHelper.isFullscreenEnabled(context) + + override fun setBoolean(value: Boolean) { + FullscreenHelper.setFullscreenEnabled(context, value) + settingsViewModel.setShouldRecreate(true) + } + + override val key: String = Settings.PREF_APP_FULLSCREEN + override val isRuntimeModifiable: Boolean = true + override val pairedSettingKey: String = "" + override val isSwitchable: Boolean = false + override var global: Boolean = true + override val isSaveable: Boolean = true + override val defaultValue: Boolean = Settings.APP_FULLSCREEN_DEFAULT + + override fun getValueAsString(needsGlobal: Boolean): String = + getBoolean(needsGlobal).toString() + + override fun reset() { + setBoolean(defaultValue) + } + } + + add( + SwitchSetting( + fullscreenSetting, + titleId = R.string.fullscreen_mode, + descriptionId = R.string.fullscreen_mode_description + ) + ) + add(HeaderSetting(R.string.buttons)) add(BooleanSetting.ENABLE_FOLDER_BUTTON.key) add(BooleanSetting.ENABLE_QLAUNCH_BUTTON.key) diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsSubscreenActivity.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsSubscreenActivity.kt index 11ecd355fb..91888dce12 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsSubscreenActivity.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsSubscreenActivity.kt @@ -20,6 +20,7 @@ import org.yuzu.yuzu_emu.R import org.yuzu.yuzu_emu.YuzuApplication import org.yuzu.yuzu_emu.databinding.ActivitySettingsBinding import org.yuzu.yuzu_emu.utils.DirectoryInitialization +import org.yuzu.yuzu_emu.utils.FullscreenHelper import org.yuzu.yuzu_emu.utils.InsetsHelper import org.yuzu.yuzu_emu.utils.ThemeHelper @@ -89,6 +90,7 @@ class SettingsSubscreenActivity : AppCompatActivity() { ) setInsets() + applyFullscreenPreference() } override fun onStart() { @@ -98,6 +100,18 @@ class SettingsSubscreenActivity : AppCompatActivity() { } } + override fun onResume() { + super.onResume() + applyFullscreenPreference() + } + + override fun onWindowFocusChanged(hasFocus: Boolean) { + super.onWindowFocusChanged(hasFocus) + if (hasFocus) { + applyFullscreenPreference() + } + } + fun navigateBack() { val navHostFragment = supportFragmentManager.findFragmentById(R.id.fragment_container) as NavHostFragment @@ -149,4 +163,8 @@ class SettingsSubscreenActivity : AppCompatActivity() { windowInsets } } + + private fun applyFullscreenPreference() { + FullscreenHelper.applyToActivity(this) + } } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/AddonsFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/AddonsFragment.kt index b20d75ef0a..96632b4606 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/AddonsFragment.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/AddonsFragment.kt @@ -25,6 +25,7 @@ import org.yuzu.yuzu_emu.model.AddonViewModel import org.yuzu.yuzu_emu.model.HomeViewModel import org.yuzu.yuzu_emu.utils.AddonUtil import org.yuzu.yuzu_emu.utils.FileUtil.copyFilesTo +import org.yuzu.yuzu_emu.utils.InstallableActions import org.yuzu.yuzu_emu.utils.ViewUtils.updateMargins import org.yuzu.yuzu_emu.utils.collect import java.io.File @@ -107,6 +108,12 @@ class AddonsFragment : Fragment() { ).show(parentFragmentManager, MessageDialogFragment.TAG) } } + parentFragmentManager.setFragmentResultListener( + ContentTypeSelectionDialogFragment.REQUEST_INSTALL_GAME_UPDATE, + viewLifecycleOwner + ) { _, _ -> + installGameUpdate.launch(arrayOf("*/*")) + } binding.buttonInstall.setOnClickListener { ContentTypeSelectionDialogFragment().show( @@ -130,7 +137,7 @@ class AddonsFragment : Fragment() { super.onDestroy() } - val installAddon = + private val installAddon = registerForActivityResult(ActivityResultContracts.OpenDocumentTree()) { result -> if (result == null) { return@registerForActivityResult @@ -175,6 +182,17 @@ class AddonsFragment : Fragment() { } } + private val installGameUpdate = + registerForActivityResult(ActivityResultContracts.OpenMultipleDocuments()) { documents -> + InstallableActions.verifyAndInstallContent( + activity = requireActivity(), + fragmentManager = parentFragmentManager, + addonViewModel = addonViewModel, + documents = documents, + programId = args.game.programId + ) + } + private fun setInsets() = ViewCompat.setOnApplyWindowInsetsListener( binding.root diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/ContentTypeSelectionDialogFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/ContentTypeSelectionDialogFragment.kt index 880c2ff3bf..fb59a3a52c 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/ContentTypeSelectionDialogFragment.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/ContentTypeSelectionDialogFragment.kt @@ -8,18 +8,14 @@ package org.yuzu.yuzu_emu.fragments import android.app.Dialog import android.content.DialogInterface -import android.net.Uri import android.os.Bundle -import androidx.activity.result.contract.ActivityResultContracts import androidx.fragment.app.DialogFragment import androidx.fragment.app.activityViewModels import androidx.preference.PreferenceManager import com.google.android.material.dialog.MaterialAlertDialogBuilder -import org.yuzu.yuzu_emu.NativeLibrary import org.yuzu.yuzu_emu.R import org.yuzu.yuzu_emu.YuzuApplication import org.yuzu.yuzu_emu.model.AddonViewModel -import org.yuzu.yuzu_emu.utils.InstallableActions class ContentTypeSelectionDialogFragment : DialogFragment() { private val addonViewModel: AddonViewModel by activityViewModels() @@ -29,52 +25,6 @@ class ContentTypeSelectionDialogFragment : DialogFragment() { private var selectedItem = 0 - private val installGameUpdateLauncher = - registerForActivityResult(ActivityResultContracts.OpenMultipleDocuments()) { documents -> - if (documents.isEmpty()) { - return@registerForActivityResult - } - - val game = addonViewModel.game - if (game == null) { - installContent(documents) - return@registerForActivityResult - } - - ProgressDialogFragment.newInstance( - requireActivity(), - R.string.verifying_content, - false - ) { _, _ -> - var updatesMatchProgram = true - for (document in documents) { - val valid = NativeLibrary.doesUpdateMatchProgram( - game.programId, - document.toString() - ) - if (!valid) { - updatesMatchProgram = false - break - } - } - - requireActivity().runOnUiThread { - if (updatesMatchProgram) { - installContent(documents) - } else { - MessageDialogFragment.newInstance( - requireActivity(), - titleId = R.string.content_install_notice, - descriptionId = R.string.content_install_notice_description, - positiveAction = { installContent(documents) }, - negativeAction = {} - ).show(parentFragmentManager, MessageDialogFragment.TAG) - } - } - return@newInstance Any() - }.show(parentFragmentManager, ProgressDialogFragment.TAG) - } - override fun onCreateDialog(savedInstanceState: Bundle?): Dialog { val launchOptions = arrayOf(getString(R.string.updates_and_dlc), getString(R.string.mods_and_cheats)) @@ -87,7 +37,10 @@ class ContentTypeSelectionDialogFragment : DialogFragment() { .setTitle(R.string.select_content_type) .setPositiveButton(android.R.string.ok) { _: DialogInterface, _: Int -> when (selectedItem) { - 0 -> installGameUpdateLauncher.launch(arrayOf("*/*")) + 0 -> parentFragmentManager.setFragmentResult( + REQUEST_INSTALL_GAME_UPDATE, + Bundle() + ) else -> { if (!preferences.getBoolean(MOD_NOTICE_SEEN, false)) { preferences.edit().putBoolean(MOD_NOTICE_SEEN, true).apply() @@ -112,17 +65,9 @@ class ContentTypeSelectionDialogFragment : DialogFragment() { companion object { const val TAG = "ContentTypeSelectionDialogFragment" + const val REQUEST_INSTALL_GAME_UPDATE = "RequestInstallGameUpdate" private const val SELECTED_ITEM = "SelectedItem" private const val MOD_NOTICE_SEEN = "ModNoticeSeen" } - - private fun installContent(documents: List) { - InstallableActions.installContent( - activity = requireActivity(), - fragmentManager = parentFragmentManager, - addonViewModel = addonViewModel, - documents = documents - ) - } } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/DriverManagerFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/DriverManagerFragment.kt index 89a6362dc6..3aa55522e6 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/DriverManagerFragment.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/DriverManagerFragment.kt @@ -29,7 +29,6 @@ import org.yuzu.yuzu_emu.databinding.FragmentDriverManagerBinding import org.yuzu.yuzu_emu.features.settings.model.Settings import org.yuzu.yuzu_emu.features.settings.model.StringSetting import org.yuzu.yuzu_emu.features.settings.ui.SettingsSubscreen -import org.yuzu.yuzu_emu.model.Driver.Companion.toDriver import org.yuzu.yuzu_emu.model.DriverViewModel import org.yuzu.yuzu_emu.model.HomeViewModel import org.yuzu.yuzu_emu.utils.FileUtil @@ -142,6 +141,17 @@ class DriverManagerFragment : Fragment() { driverViewModel.onCloseDriverManager(args.game) } + override fun onResume() { + super.onResume() + refreshDriverList() + } + + private fun refreshDriverList() { + driverViewModel.reloadDriverData() + (binding.listDrivers.adapter as? DriverAdapter) + ?.replaceList(driverViewModel.driverList.value) + } + private fun setInsets() = ViewCompat.setOnApplyWindowInsetsListener( binding.root @@ -205,19 +215,23 @@ class DriverManagerFragment : Fragment() { val driverData = GpuDriverHelper.getMetadataFromZip(driverFile) val driverInList = - driverViewModel.driverData.firstOrNull { it.second == driverData } + driverViewModel.driverData.firstOrNull { + it.first == driverPath || it.second == driverData + } if (driverInList != null) { return@newInstance getString(R.string.driver_already_installed) } else { driverViewModel.onDriverAdded(Pair(driverPath, driverData)) withContext(Dispatchers.Main) { if (_binding != null) { + refreshDriverList() val adapter = binding.listDrivers.adapter as DriverAdapter - adapter.addItem(driverData.toDriver()) - adapter.selectItem(adapter.currentList.indices.last) + val selectedPosition = adapter.currentList + .indexOfFirst { it.selected } + .let { if (it == -1) 0 else it } driverViewModel.showClearButton(!StringSetting.DRIVER_PATH.global) binding.listDrivers - .smoothScrollToPosition(adapter.currentList.indices.last) + .smoothScrollToPosition(selectedPosition) } } } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt index 435fe5fe2c..b67bc6a9cc 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt @@ -50,6 +50,7 @@ import androidx.fragment.app.Fragment import androidx.fragment.app.activityViewModels import androidx.lifecycle.lifecycleScope import androidx.navigation.findNavController +import androidx.navigation.fragment.NavHostFragment import androidx.navigation.fragment.navArgs import androidx.window.layout.FoldingFeature import androidx.window.layout.WindowInfoTracker @@ -135,6 +136,8 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { private var intentGame: Game? = null private var isCustomSettingsIntent = false + private var isStoppingForRomSwap = false + private var deferGameSetupUntilStopCompletes = false private var perfStatsRunnable: Runnable? = null private var socRunnable: Runnable? = null @@ -238,6 +241,14 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { } } + if (emulationViewModel.isEmulationStopping.value) { + deferGameSetupUntilStopCompletes = true + if (game == null) { + game = args.game ?: intentGame + } + return + } + finishGameSetup() } @@ -260,6 +271,7 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { } game = gameToUse + emulationActivity?.updateSessionGame(gameToUse) } catch (e: Exception) { Log.error("[EmulationFragment] Error during game setup: ${e.message}") Toast.makeText( @@ -334,7 +346,8 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { } emulationState = EmulationState(game!!.path) { - return@EmulationState driverViewModel.isInteractionAllowed.value + return@EmulationState driverViewModel.isInteractionAllowed.value && + !isStoppingForRomSwap } } @@ -890,8 +903,12 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { } ) - GameIconUtils.loadGameIcon(game!!, binding.loadingImage) - binding.loadingTitle.text = game!!.title + game?.let { + GameIconUtils.loadGameIcon(it, binding.loadingImage) + binding.loadingTitle.text = it.title + } ?: run { + binding.loadingTitle.text = "" + } binding.loadingTitle.isSelected = true binding.loadingText.isSelected = true @@ -959,6 +976,12 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { ViewUtils.showView(binding.loadingIndicator) ViewUtils.hideView(binding.inputContainer) ViewUtils.hideView(binding.showStatsOverlayText) + } else if (deferGameSetupUntilStopCompletes) { + if (!isAdded) { + return@collect + } + deferGameSetupUntilStopCompletes = false + finishGameSetup() } } emulationViewModel.drawerOpen.collect(viewLifecycleOwner) { @@ -995,26 +1018,24 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { } driverViewModel.isInteractionAllowed.collect(viewLifecycleOwner) { - if (it && !NativeLibrary.isRunning() && !NativeLibrary.isPaused()) { - startEmulation() + if (it && + !isStoppingForRomSwap && + !NativeLibrary.isRunning() && + !NativeLibrary.isPaused() + ) { + if (!DirectoryInitialization.areDirectoriesReady) { + DirectoryInitialization.start() + } + + updateScreenLayout() + + emulationState.run(emulationActivity!!.isActivityRecreated) } } driverViewModel.onLaunchGame() } - private fun startEmulation(programIndex: Int = 0) { - if (!NativeLibrary.isRunning() && !NativeLibrary.isPaused()) { - if (!DirectoryInitialization.areDirectoriesReady) { - DirectoryInitialization.start() - } - - updateScreenLayout() - - emulationState.run(emulationActivity!!.isActivityRecreated, programIndex) - } - } - override fun onConfigurationChanged(newConfig: Configuration) { super.onConfigurationChanged(newConfig) val b = _binding ?: return @@ -1375,6 +1396,9 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { super.onDestroyView() amiiboLoadJob?.cancel() amiiboLoadJob = null + perfStatsRunnable?.let { perfStatsUpdateHandler.removeCallbacks(it) } + socRunnable?.let { socUpdateHandler.removeCallbacks(it) } + handler.removeCallbacksAndMessages(null) clearPausedFrame() _binding?.surfaceInputOverlay?.touchEventListener = null _binding = null @@ -1382,7 +1406,9 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { } override fun onDetach() { - NativeLibrary.clearEmulationActivity() + if (!hasNewerEmulationFragment()) { + NativeLibrary.clearEmulationActivity() + } super.onDetach() } @@ -1840,10 +1866,74 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { } override fun surfaceDestroyed(holder: SurfaceHolder) { - emulationState.clearSurface() + if (this::emulationState.isInitialized && !hasNewerEmulationFragment()) { + emulationState.clearSurface() + } emulationStarted = false } + private fun hasNewerEmulationFragment(): Boolean { + val activity = emulationActivity ?: return false + return try { + val navHostFragment = + activity.supportFragmentManager.findFragmentById(R.id.fragment_container) as? NavHostFragment + ?: return false + val currentFragment = navHostFragment.childFragmentManager.fragments + .filterIsInstance() + .firstOrNull() + currentFragment != null && currentFragment !== this + } catch (_: Exception) { + false + } + } + + // xbzk: called from EmulationActivity when a new game is loaded while this fragment is still active, + // to wait for the emulation thread to stop before allowing the ROM swap to proceed + fun notifyWhenEmulationThreadStops(onStopped: () -> Unit) { + if (!this::emulationState.isInitialized) { + onStopped() + return + } + val emuThread = runCatching { emulationState.emulationThread }.getOrNull() + if (emuThread == null || !emuThread.isAlive) { + onStopped() + return + } + Thread({ + runCatching { emuThread.join() } + Handler(Looper.getMainLooper()).post { + onStopped() + } + }, "RomSwapWait").start() + } + + // xbzk: called from EmulationActivity when a new game is loaded while this + // fragment is still active, to stop the current emulation before swapping the ROM + fun stopForRomSwap() { + if (isStoppingForRomSwap) { + return + } + isStoppingForRomSwap = true + clearPausedFrame() + emulationViewModel.setIsEmulationStopping(true) + _binding?.let { + binding.loadingText.setText(R.string.shutting_down) + ViewUtils.showView(binding.loadingIndicator) + ViewUtils.hideView(binding.inputContainer) + ViewUtils.hideView(binding.showStatsOverlayText) + } + if (this::emulationState.isInitialized) { + emulationState.stop() + if (NativeLibrary.isRunning() || NativeLibrary.isPaused()) { + Log.warning("[EmulationFragment] ROM swap stop fallback: forcing native stop request.") + NativeLibrary.stopEmulation() + } + } else { + NativeLibrary.stopEmulation() + } + NativeConfig.reloadGlobalConfig() + } + private fun showOverlayOptions() { val anchor = binding.inGameMenu.findViewById(R.id.menu_overlay_controls) val popup = PopupMenu(requireContext(), anchor) @@ -2134,6 +2224,7 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { state = State.STOPPED } else { Log.warning("[EmulationFragment] Stop called while already stopped.") + NativeLibrary.stopEmulation() } } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/GamePropertiesFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/GamePropertiesFragment.kt index 46b75197d5..c3dea79bae 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/GamePropertiesFragment.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/GamePropertiesFragment.kt @@ -36,6 +36,7 @@ import org.yuzu.yuzu_emu.databinding.FragmentGamePropertiesBinding import org.yuzu.yuzu_emu.features.DocumentProvider import org.yuzu.yuzu_emu.features.settings.model.Settings import org.yuzu.yuzu_emu.features.settings.ui.SettingsSubscreen +import org.yuzu.yuzu_emu.model.AddonViewModel import org.yuzu.yuzu_emu.model.DriverViewModel import org.yuzu.yuzu_emu.model.GameProperty import org.yuzu.yuzu_emu.model.GamesViewModel @@ -46,6 +47,7 @@ import org.yuzu.yuzu_emu.model.SubmenuProperty import org.yuzu.yuzu_emu.model.TaskState import org.yuzu.yuzu_emu.utils.DirectoryInitialization import org.yuzu.yuzu_emu.utils.FileUtil +import org.yuzu.yuzu_emu.utils.GameHelper import org.yuzu.yuzu_emu.utils.GameIconUtils import org.yuzu.yuzu_emu.utils.GpuDriverHelper import org.yuzu.yuzu_emu.utils.MemoryUtil @@ -61,6 +63,7 @@ class GamePropertiesFragment : Fragment() { private val homeViewModel: HomeViewModel by activityViewModels() private val gamesViewModel: GamesViewModel by activityViewModels() + private val addonViewModel: AddonViewModel by activityViewModels() private val driverViewModel: DriverViewModel by activityViewModels() private val args by navArgs() @@ -118,6 +121,20 @@ class GamePropertiesFragment : Fragment() { .show(childFragmentManager, LaunchGameDialogFragment.TAG) } + if (GameHelper.cachedGameList.isEmpty()) { + binding.buttonStart.isEnabled = false + viewLifecycleOwner.lifecycleScope.launch { + withContext(Dispatchers.IO) { + GameHelper.restoreContentForGame(args.game) + } + if (_binding == null) { + return@launch + } + addonViewModel.onAddonsViewStarted(args.game) + binding.buttonStart.isEnabled = true + } + } + reloadList() homeViewModel.openImportSaves.collect( @@ -133,8 +150,11 @@ class GamePropertiesFragment : Fragment() { } override fun onDestroy() { + val isChangingConfigurations = activity?.isChangingConfigurations == true super.onDestroy() - gamesViewModel.reloadGames(true) + if (!isChangingConfigurations) { + gamesViewModel.reloadGames(true) + } } private fun getPlayTime() { diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/InstallableFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/InstallableFragment.kt index 10862c37b4..6510c069e3 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/InstallableFragment.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/InstallableFragment.kt @@ -227,66 +227,13 @@ class InstallableFragment : Fragment() { private val installGameUpdateLauncher = registerForActivityResult(ActivityResultContracts.OpenMultipleDocuments()) { documents -> - if (documents.isEmpty()) { - return@registerForActivityResult - } - - if (addonViewModel.game == null) { - InstallableActions.installContent( - activity = requireActivity(), - fragmentManager = parentFragmentManager, - addonViewModel = addonViewModel, - documents = documents - ) - return@registerForActivityResult - } - - ProgressDialogFragment.newInstance( - requireActivity(), - R.string.verifying_content, - false - ) { _, _ -> - var updatesMatchProgram = true - for (document in documents) { - val valid = NativeLibrary.doesUpdateMatchProgram( - addonViewModel.game!!.programId, - document.toString() - ) - if (!valid) { - updatesMatchProgram = false - break - } - } - - if (updatesMatchProgram) { - requireActivity().runOnUiThread { - InstallableActions.installContent( - activity = requireActivity(), - fragmentManager = parentFragmentManager, - addonViewModel = addonViewModel, - documents = documents - ) - } - } else { - requireActivity().runOnUiThread { - MessageDialogFragment.newInstance( - requireActivity(), - titleId = R.string.content_install_notice, - descriptionId = R.string.content_install_notice_description, - positiveAction = { - InstallableActions.installContent( - activity = requireActivity(), - fragmentManager = parentFragmentManager, - addonViewModel = addonViewModel, - documents = documents - ) - }, - negativeAction = {} - ).show(parentFragmentManager, MessageDialogFragment.TAG) - } - } - return@newInstance Any() - }.show(parentFragmentManager, ProgressDialogFragment.TAG) + InstallableActions.verifyAndInstallContent( + activity = requireActivity(), + fragmentManager = parentFragmentManager, + addonViewModel = addonViewModel, + documents = documents, + programId = addonViewModel.game?.programId + ) } private val importUserDataLauncher = diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/ProfileManagerFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/ProfileManagerFragment.kt index 2786906f6b..bd37c4c9c7 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/ProfileManagerFragment.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/ProfileManagerFragment.kt @@ -9,6 +9,7 @@ import android.view.View import android.view.ViewGroup import androidx.core.view.ViewCompat import androidx.core.view.WindowInsetsCompat +import androidx.core.view.updatePadding import androidx.fragment.app.Fragment import androidx.fragment.app.activityViewModels import androidx.navigation.fragment.findNavController @@ -22,6 +23,7 @@ import org.yuzu.yuzu_emu.databinding.FragmentProfileManagerBinding import org.yuzu.yuzu_emu.model.HomeViewModel import org.yuzu.yuzu_emu.model.UserProfile import org.yuzu.yuzu_emu.utils.NativeConfig +import org.yuzu.yuzu_emu.utils.ViewUtils.updateMargins class ProfileManagerFragment : Fragment() { private var _binding: FragmentProfileManagerBinding? = null @@ -172,11 +174,19 @@ class ProfileManagerFragment : Fragment() { val leftInsets = barInsets.left + cutoutInsets.left val rightInsets = barInsets.right + cutoutInsets.right - val fabLayoutParams = binding.buttonAddUser.layoutParams as ViewGroup.MarginLayoutParams - fabLayoutParams.leftMargin = leftInsets + 24 - fabLayoutParams.rightMargin = rightInsets + 24 - fabLayoutParams.bottomMargin = barInsets.bottom + 24 - binding.buttonAddUser.layoutParams = fabLayoutParams + binding.toolbarProfiles.updateMargins(left = leftInsets, right = rightInsets) + binding.listProfiles.updateMargins(left = leftInsets, right = rightInsets) + binding.listProfiles.updatePadding( + bottom = barInsets.bottom + + resources.getDimensionPixelSize(R.dimen.spacing_bottom_list_fab) + ) + + val fabSpacing = resources.getDimensionPixelSize(R.dimen.spacing_fab) + binding.buttonAddUser.updateMargins( + left = leftInsets + fabSpacing, + right = rightInsets + fabSpacing, + bottom = barInsets.bottom + fabSpacing + ) windowInsets } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/DriverViewModel.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/DriverViewModel.kt index cd5792b33a..3904f83279 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/DriverViewModel.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/DriverViewModel.kt @@ -71,6 +71,8 @@ class DriverViewModel : ViewModel() { fun reloadDriverData() { _areDriversLoading.value = true driverData = GpuDriverHelper.getDrivers() + .filterNot { driversToDelete.contains(it.first) } + .toMutableList() updateDriverList() _areDriversLoading.value = false } @@ -152,41 +154,55 @@ class DriverViewModel : ViewModel() { } fun onDriverRemoved(removedPosition: Int, selectedPosition: Int) { - driversToDelete.add(driverData[removedPosition - 1].first) - driverData.removeAt(removedPosition - 1) - onDriverSelected(selectedPosition) + val driverIndex = removedPosition - 1 + if (driverIndex !in driverData.indices) { + updateDriverList() + return + } + + driversToDelete.add(driverData[driverIndex].first) + driverData.removeAt(driverIndex) + val safeSelectedPosition = selectedPosition.coerceIn(0, driverData.size) + onDriverSelected(safeSelectedPosition) } fun onDriverAdded(driver: Pair) { if (driversToDelete.contains(driver.first)) { driversToDelete.remove(driver.first) } + + val existingDriverIndex = driverData.indexOfFirst { + it.first == driver.first || it.second == driver.second + } + if (existingDriverIndex != -1) { + onDriverSelected(existingDriverIndex + 1) + return + } driverData.add(driver) onDriverSelected(driverData.size) } fun onCloseDriverManager(game: Game?) { _isDeletingDrivers.value = true - updateDriverNameForGame(game) - if (game == null) { - NativeConfig.saveGlobalConfig() - } else { - NativeConfig.savePerGameConfig() - NativeConfig.unloadPerGameConfig() - NativeConfig.reloadGlobalConfig() - } - - viewModelScope.launch { - withContext(Dispatchers.IO) { - driversToDelete.forEach { - val driver = File(it) - if (driver.exists()) { - driver.delete() - } - } - driversToDelete.clear() - _isDeletingDrivers.value = false + try { + updateDriverNameForGame(game) + if (game == null) { + NativeConfig.saveGlobalConfig() + } else { + NativeConfig.savePerGameConfig() + NativeConfig.unloadPerGameConfig() + NativeConfig.reloadGlobalConfig() } + + driversToDelete.forEach { + val driver = File(it) + if (driver.exists()) { + driver.delete() + } + } + driversToDelete.clear() + } finally { + _isDeletingDrivers.value = false } } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/GamesViewModel.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/GamesViewModel.kt index 39ff038034..1a63a3ad82 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/GamesViewModel.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/GamesViewModel.kt @@ -100,42 +100,45 @@ class GamesViewModel : ViewModel() { viewModelScope.launch { withContext(Dispatchers.IO) { - if (firstStartup) { - // Retrieve list of cached games - val storedGames = - PreferenceManager.getDefaultSharedPreferences(YuzuApplication.appContext) - .getStringSet(GameHelper.KEY_GAMES, emptySet()) - if (storedGames!!.isNotEmpty()) { - val deserializedGames = mutableSetOf() - storedGames.forEach { - val game: Game - try { - game = Json.decodeFromString(it) - } catch (e: Exception) { - // We don't care about any errors related to parsing the game cache - return@forEach - } + try { + if (firstStartup) { + // Retrieve list of cached games + val storedGames = + PreferenceManager.getDefaultSharedPreferences(YuzuApplication.appContext) + .getStringSet(GameHelper.KEY_GAMES, emptySet()) + if (storedGames!!.isNotEmpty()) { + val deserializedGames = mutableSetOf() + storedGames.forEach { + val game: Game + try { + game = Json.decodeFromString(it) + } catch (e: Exception) { + // We don't care about any errors related to parsing the game cache + return@forEach + } - val gameExists = - DocumentFile.fromSingleUri( - YuzuApplication.appContext, - Uri.parse(game.path) - )?.exists() - if (gameExists == true) { - deserializedGames.add(game) + val gameExists = + DocumentFile.fromSingleUri( + YuzuApplication.appContext, + Uri.parse(game.path) + )?.exists() + if (gameExists == true) { + deserializedGames.add(game) + } } + setGames(deserializedGames.toList()) } - setGames(deserializedGames.toList()) } - } - setGames(GameHelper.getGames()) - reloading.set(false) - _isReloading.value = false - _shouldScrollAfterReload.value = true + setGames(GameHelper.getGames()) + _shouldScrollAfterReload.value = true - if (directoriesChanged) { - setShouldSwapData(true) + if (directoriesChanged) { + setShouldSwapData(true) + } + } finally { + reloading.set(false) + _isReloading.value = false } } } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt index f0806df786..584322df50 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt @@ -32,7 +32,6 @@ import org.yuzu.yuzu_emu.databinding.ActivityMainBinding import org.yuzu.yuzu_emu.dialogs.NetPlayDialog import org.yuzu.yuzu_emu.features.settings.model.Settings import org.yuzu.yuzu_emu.fragments.AddGameFolderDialogFragment -import org.yuzu.yuzu_emu.fragments.ProgressDialogFragment import org.yuzu.yuzu_emu.fragments.MessageDialogFragment import org.yuzu.yuzu_emu.model.AddonViewModel import org.yuzu.yuzu_emu.model.DriverViewModel @@ -87,8 +86,6 @@ class MainActivity : AppCompatActivity(), ThemeProvider { binding = ActivityMainBinding.inflate(layoutInflater) - // Since Android 15, google automatically forces "games" to be 60 hrz - // This ensures the display's max refresh rate is actually used display?.let { val supportedModes = it.supportedModes val maxRefreshRate = supportedModes.maxByOrNull { mode -> mode.refreshRate } @@ -170,6 +167,7 @@ class MainActivity : AppCompatActivity(), ThemeProvider { checkForUpdates() } setInsets() + applyFullscreenPreference() } private fun checkForUpdates() { @@ -346,6 +344,14 @@ class MainActivity : AppCompatActivity(), ThemeProvider { override fun onResume() { ThemeHelper.setCorrectTheme(this) super.onResume() + applyFullscreenPreference() + } + + override fun onWindowFocusChanged(hasFocus: Boolean) { + super.onWindowFocusChanged(hasFocus) + if (hasFocus) { + applyFullscreenPreference() + } } private fun setInsets() = ViewCompat.setOnApplyWindowInsetsListener( @@ -365,6 +371,10 @@ class MainActivity : AppCompatActivity(), ThemeProvider { windowInsets } + private fun applyFullscreenPreference() { + FullscreenHelper.applyToActivity(this) + } + override fun setTheme(resId: Int) { super.setTheme(resId) themeId = resId @@ -479,49 +489,6 @@ class MainActivity : AppCompatActivity(), ThemeProvider { ) } - val installGameUpdate = registerForActivityResult( - ActivityResultContracts.OpenMultipleDocuments() - ) { documents: List -> - if (documents.isEmpty()) { - return@registerForActivityResult - } - - if (addonViewModel.game == null) { - installContent(documents) - return@registerForActivityResult - } - - ProgressDialogFragment.newInstance( - this@MainActivity, - R.string.verifying_content, - false - ) { _, _ -> - var updatesMatchProgram = true - for (document in documents) { - val valid = NativeLibrary.doesUpdateMatchProgram( - addonViewModel.game!!.programId, - document.toString() - ) - if (!valid) { - updatesMatchProgram = false - break - } - } - - if (updatesMatchProgram) { - homeViewModel.setContentToInstall(documents) - } else { - MessageDialogFragment.newInstance( - this@MainActivity, - titleId = R.string.content_install_notice, - descriptionId = R.string.content_install_notice_description, - positiveAction = { homeViewModel.setContentToInstall(documents) }, - negativeAction = {} - ) - } - }.show(supportFragmentManager, ProgressDialogFragment.TAG) - } - private fun installContent(documents: List) { InstallableActions.installContent( activity = this, diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/DirectoryInitialization.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/DirectoryInitialization.kt index f47c60491b..f961c5e984 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/DirectoryInitialization.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/DirectoryInitialization.kt @@ -23,8 +23,8 @@ object DirectoryInitialization { fun start() { if (!areDirectoriesReady) { initializeInternalStorage() - NativeLibrary.initializeSystem(false) NativeConfig.initializeGlobalConfig() + NativeLibrary.initializeSystem(false) NativeLibrary.reloadProfiles() migrateSettings() areDirectoriesReady = true diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/FullscreenHelper.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/FullscreenHelper.kt new file mode 100644 index 0000000000..62c83e1806 --- /dev/null +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/FullscreenHelper.kt @@ -0,0 +1,52 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +package org.yuzu.yuzu_emu.utils + +import android.app.Activity +import android.content.Context +import android.view.Window +import androidx.core.content.edit +import androidx.core.view.ViewCompat +import androidx.core.view.WindowInsetsCompat +import androidx.core.view.WindowInsetsControllerCompat +import androidx.preference.PreferenceManager +import org.yuzu.yuzu_emu.features.settings.model.Settings + +object FullscreenHelper { + fun isFullscreenEnabled(context: Context): Boolean { + return PreferenceManager.getDefaultSharedPreferences(context).getBoolean( + Settings.PREF_APP_FULLSCREEN, + Settings.APP_FULLSCREEN_DEFAULT + ) + } + + fun setFullscreenEnabled(context: Context, enabled: Boolean) { + PreferenceManager.getDefaultSharedPreferences(context).edit { + putBoolean(Settings.PREF_APP_FULLSCREEN, enabled) + } + } + + fun shouldHideSystemBars(activity: Activity): Boolean { + val rootInsets = ViewCompat.getRootWindowInsets(activity.window.decorView) + val barsCurrentlyHidden = + rootInsets?.isVisible(WindowInsetsCompat.Type.systemBars())?.not() ?: false + return isFullscreenEnabled(activity) || barsCurrentlyHidden + } + + fun applyToWindow(window: Window, hideSystemBars: Boolean) { + val controller = WindowInsetsControllerCompat(window, window.decorView) + + if (hideSystemBars) { + controller.hide(WindowInsetsCompat.Type.systemBars()) + controller.systemBarsBehavior = + WindowInsetsControllerCompat.BEHAVIOR_SHOW_TRANSIENT_BARS_BY_SWIPE + } else { + controller.show(WindowInsetsCompat.Type.systemBars()) + } + } + + fun applyToActivity(activity: Activity) { + applyToWindow(activity.window, isFullscreenEnabled(activity)) + } +} diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GameHelper.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GameHelper.kt index 4a3cf61daa..64e035afbe 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GameHelper.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GameHelper.kt @@ -8,9 +8,11 @@ package org.yuzu.yuzu_emu.utils import android.content.SharedPreferences import android.net.Uri +import android.provider.DocumentsContract import androidx.preference.PreferenceManager import kotlinx.serialization.encodeToString import kotlinx.serialization.json.Json +import java.io.File import org.yuzu.yuzu_emu.NativeLibrary import org.yuzu.yuzu_emu.YuzuApplication import org.yuzu.yuzu_emu.model.Game @@ -49,29 +51,8 @@ object GameHelper { // Remove previous filesystem provider information so we can get up to date version info NativeLibrary.clearFilesystemProvider() - // Scan External Content directories and register all NSP/XCI files - val externalContentDirs = NativeConfig.getExternalContentDirs() - val uniqueExternalContentDirs = linkedSetOf() - externalContentDirs.forEach { externalDir -> - if (externalDir.isNotEmpty()) { - uniqueExternalContentDirs.add(externalDir) - } - } - val mountedContainerUris = mutableSetOf() - for (externalDir in uniqueExternalContentDirs) { - if (externalDir.isNotEmpty()) { - val externalDirUri = externalDir.toUri() - if (FileUtil.isTreeUriValid(externalDirUri)) { - scanContentContainersRecursive(FileUtil.listFiles(externalDirUri), 3) { - val containerUri = it.uri.toString() - if (mountedContainerUris.add(containerUri)) { - NativeLibrary.addFileToFilesystemProvider(containerUri) - } - } - } - } - } + mountExternalContentDirectories(mountedContainerUris) val badDirs = mutableListOf() gameDirs.forEachIndexed { index: Int, gameDir: GameDir -> @@ -115,6 +96,15 @@ object GameHelper { return games.toList() } + fun restoreContentForGame(game: Game) { + NativeLibrary.reloadKeys() + + val mountedContainerUris = mutableSetOf() + mountExternalContentDirectories(mountedContainerUris) + mountGameFolderContent(Uri.parse(game.path), mountedContainerUris) + NativeLibrary.addFileToFilesystemProvider(game.path) + } + // File extensions considered as external content, buuut should // be done better imo. private val externalContentExtensions = setOf("nsp", "xci") @@ -181,6 +171,71 @@ object GameHelper { } } + private fun mountExternalContentDirectories(mountedContainerUris: MutableSet) { + val uniqueExternalContentDirs = linkedSetOf() + NativeConfig.getExternalContentDirs().forEach { externalDir -> + if (externalDir.isNotEmpty()) { + uniqueExternalContentDirs.add(externalDir) + } + } + + for (externalDir in uniqueExternalContentDirs) { + val externalDirUri = externalDir.toUri() + if (FileUtil.isTreeUriValid(externalDirUri)) { + scanContentContainersRecursive(FileUtil.listFiles(externalDirUri), 3) { + val containerUri = it.uri.toString() + if (mountedContainerUris.add(containerUri)) { + NativeLibrary.addFileToFilesystemProvider(containerUri) + } + } + } + } + } + + private fun mountGameFolderContent(gameUri: Uri, mountedContainerUris: MutableSet) { + if (gameUri.scheme == "content") { + val parentUri = getParentDocumentUri(gameUri) ?: return + scanContentContainersRecursive(FileUtil.listFiles(parentUri), 1) { + val containerUri = it.uri.toString() + if (mountedContainerUris.add(containerUri)) { + NativeLibrary.addGameFolderFileToFilesystemProvider(containerUri) + } + } + return + } + + val gameFile = File(gameUri.path ?: gameUri.toString()) + val parentDir = gameFile.parentFile ?: return + parentDir.listFiles()?.forEach { sibling -> + if (!sibling.isFile) { + return@forEach + } + + val extension = sibling.extension.lowercase() + if (externalContentExtensions.contains(extension)) { + val containerUri = Uri.fromFile(sibling).toString() + if (mountedContainerUris.add(containerUri)) { + NativeLibrary.addGameFolderFileToFilesystemProvider(containerUri) + } + } + } + } + + private fun getParentDocumentUri(uri: Uri): Uri? { + return try { + val documentId = DocumentsContract.getDocumentId(uri) + val separatorIndex = documentId.lastIndexOf('/') + if (separatorIndex == -1) { + null + } else { + val parentDocumentId = documentId.substring(0, separatorIndex) + DocumentsContract.buildDocumentUriUsingTree(uri, parentDocumentId) + } + } catch (_: Exception) { + null + } + } + fun getGame( uri: Uri, addedToLibrary: Boolean, diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/InstallableActions.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/InstallableActions.kt index d385e2a095..882bae965b 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/InstallableActions.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/InstallableActions.kt @@ -26,6 +26,78 @@ import java.util.zip.ZipEntry import java.util.zip.ZipInputStream object InstallableActions { + private fun verifyGameContentAndInstall( + activity: FragmentActivity, + fragmentManager: FragmentManager, + documents: List, + programId: String?, + onInstallConfirmed: () -> Unit + ) { + if (documents.isEmpty()) { + return + } + + if (programId == null) { + onInstallConfirmed() + return + } + + ProgressDialogFragment.newInstance( + activity, + R.string.verifying_content, + false + ) { _, _ -> + var updatesMatchProgram = true + for (document in documents) { + val valid = NativeLibrary.doesUpdateMatchProgram( + programId, + document.toString() + ) + if (!valid) { + updatesMatchProgram = false + break + } + } + + activity.runOnUiThread { + if (updatesMatchProgram) { + onInstallConfirmed() + } else { + MessageDialogFragment.newInstance( + activity, + titleId = R.string.content_install_notice, + descriptionId = R.string.content_install_notice_description, + positiveAction = onInstallConfirmed, + negativeAction = {} + ).show(fragmentManager, MessageDialogFragment.TAG) + } + } + return@newInstance Any() + }.show(fragmentManager, ProgressDialogFragment.TAG) + } + + fun verifyAndInstallContent( + activity: FragmentActivity, + fragmentManager: FragmentManager, + addonViewModel: AddonViewModel, + documents: List, + programId: String? + ) { + verifyGameContentAndInstall( + activity = activity, + fragmentManager = fragmentManager, + documents = documents, + programId = programId + ) { + installContent( + activity = activity, + fragmentManager = fragmentManager, + addonViewModel = addonViewModel, + documents = documents + ) + } + } + fun processKey( activity: FragmentActivity, fragmentManager: FragmentManager, diff --git a/src/android/app/src/main/jni/android_settings.h b/src/android/app/src/main/jni/android_settings.h index 606ce2ce84..8628021f75 100644 --- a/src/android/app/src/main/jni/android_settings.h +++ b/src/android/app/src/main/jni/android_settings.h @@ -56,7 +56,7 @@ namespace AndroidSettings { Settings::Setting theme{linkage, 0, "theme", Settings::Category::Android}; Settings::Setting theme_mode{linkage, -1, "theme_mode", Settings::Category::Android}; - Settings::Setting static_theme_color{linkage, 5, "static_theme_color", Settings::Category::Android}; + Settings::Setting static_theme_color{linkage, 0, "static_theme_color", Settings::Category::Android}; Settings::Setting black_backgrounds{linkage, false, "black_backgrounds", Settings::Category::Android}; Settings::Setting app_language{linkage, 0, "app_language", Settings::Category::Android}; diff --git a/src/android/app/src/main/jni/emu_window/emu_window.cpp b/src/android/app/src/main/jni/emu_window/emu_window.cpp index 4e6fd560f4..6886dac172 100644 --- a/src/android/app/src/main/jni/emu_window/emu_window.cpp +++ b/src/android/app/src/main/jni/emu_window/emu_window.cpp @@ -6,8 +6,15 @@ #include +#include +#include +#include +#include +#include + #include "common/android/id_cache.h" #include "common/logging.h" +#include "common/settings.h" #include "input_common/drivers/android.h" #include "input_common/drivers/touch_screen.h" #include "input_common/drivers/virtual_amiibo.h" @@ -22,6 +29,12 @@ void EmuWindow_Android::OnSurfaceChanged(ANativeWindow* surface) { m_window_width = 0; m_window_height = 0; window_info.render_surface = nullptr; + m_last_frame_rate_hint = -1.0f; + m_pending_frame_rate_hint = -1.0f; + m_pending_frame_rate_hint_votes = 0; + m_smoothed_present_rate = 0.0f; + m_last_frame_display_time = {}; + m_pending_frame_rate_since = {}; return; } @@ -32,6 +45,7 @@ void EmuWindow_Android::OnSurfaceChanged(ANativeWindow* surface) { UpdateCurrentFramebufferLayout(m_window_width, m_window_height); window_info.render_surface = reinterpret_cast(surface); + UpdateFrameRateHint(); } void EmuWindow_Android::OnTouchPressed(int id, float x, float y) { @@ -51,6 +65,9 @@ void EmuWindow_Android::OnTouchReleased(int id) { } void EmuWindow_Android::OnFrameDisplayed() { + UpdateObservedFrameRate(); + UpdateFrameRateHint(); + if (!m_first_frame) { Common::Android::RunJNIOnFiber( [&](JNIEnv* env) { EmulationSession::GetInstance().OnEmulationStarted(); }); @@ -58,6 +75,175 @@ void EmuWindow_Android::OnFrameDisplayed() { } } +void EmuWindow_Android::UpdateObservedFrameRate() { + const auto now = Clock::now(); + if (m_last_frame_display_time.time_since_epoch().count() != 0) { + const auto frame_time = std::chrono::duration(now - m_last_frame_display_time); + const float seconds = frame_time.count(); + if (seconds > 0.0f) { + const float instantaneous_rate = 1.0f / seconds; + if (std::isfinite(instantaneous_rate) && instantaneous_rate >= 1.0f && + instantaneous_rate <= 240.0f) { + constexpr float SmoothingFactor = 0.15f; + if (m_smoothed_present_rate <= 0.0f) { + m_smoothed_present_rate = instantaneous_rate; + } else { + m_smoothed_present_rate += + (instantaneous_rate - m_smoothed_present_rate) * SmoothingFactor; + } + } + } + } + m_last_frame_display_time = now; +} + +float EmuWindow_Android::QuantizeFrameRateHint(float frame_rate) { + if (!std::isfinite(frame_rate) || frame_rate <= 0.0f) { + return 0.0f; + } + + frame_rate = std::clamp(frame_rate, 1.0f, 240.0f); + + constexpr float Step = 0.5f; + return std::round(frame_rate / Step) * Step; +} + +float EmuWindow_Android::GetFrameTimeVerifiedHint() const { + if (!EmulationSession::GetInstance().IsRunning()) { + return 0.0f; + } + + const double frame_time_scale = + EmulationSession::GetInstance().System().GetPerfStats().GetLastFrameTimeScale(); + if (!std::isfinite(frame_time_scale) || frame_time_scale <= 0.0) { + return 0.0f; + } + + const float verified_rate = + std::clamp(60.0f / static_cast(frame_time_scale), 0.0f, 240.0f); + return QuantizeFrameRateHint(verified_rate); +} + +float EmuWindow_Android::GetFrameRateHint() const { + const float observed_rate = std::clamp(m_smoothed_present_rate, 0.0f, 240.0f); + const float frame_time_verified_hint = GetFrameTimeVerifiedHint(); + + if (m_last_frame_rate_hint > 0.0f && observed_rate > 0.0f) { + const float tolerance = std::max(m_last_frame_rate_hint * 0.12f, 4.0f); + if (std::fabs(observed_rate - m_last_frame_rate_hint) <= tolerance) { + return m_last_frame_rate_hint; + } + } + + const float observed_hint = QuantizeFrameRateHint(observed_rate); + if (observed_hint > 0.0f) { + if (frame_time_verified_hint > 0.0f) { + const float tolerance = std::max(observed_hint * 0.20f, 3.0f); + if (std::fabs(observed_hint - frame_time_verified_hint) <= tolerance) { + return QuantizeFrameRateHint((observed_hint + frame_time_verified_hint) * 0.5f); + } + } + return observed_hint; + } + + if (frame_time_verified_hint > 0.0f) { + return frame_time_verified_hint; + } + + constexpr float NominalFrameRate = 60.0f; + if (!Settings::values.use_speed_limit.GetValue()) { + return NominalFrameRate; + } + + const u16 speed_limit = Settings::SpeedLimit(); + if (speed_limit == 0) { + return 0.0f; + } + + const float speed_limited_rate = + NominalFrameRate * (static_cast(std::min(speed_limit, 100)) / 100.0f); + return QuantizeFrameRateHint(speed_limited_rate); +} + +void EmuWindow_Android::UpdateFrameRateHint() { + auto* const surface = reinterpret_cast(window_info.render_surface); + if (!surface) { + return; + } + + const auto now = Clock::now(); + const float frame_rate_hint = GetFrameRateHint(); + if (std::fabs(frame_rate_hint - m_last_frame_rate_hint) < 0.01f) { + m_pending_frame_rate_hint = frame_rate_hint; + m_pending_frame_rate_hint_votes = 0; + m_pending_frame_rate_since = {}; + return; + } + + if (frame_rate_hint == 0.0f) { + m_pending_frame_rate_hint = frame_rate_hint; + m_pending_frame_rate_hint_votes = 0; + m_pending_frame_rate_since = now; + } else if (m_last_frame_rate_hint >= 0.0f) { + if (std::fabs(frame_rate_hint - m_pending_frame_rate_hint) >= 0.01f) { + m_pending_frame_rate_hint = frame_rate_hint; + m_pending_frame_rate_hint_votes = 1; + m_pending_frame_rate_since = now; + return; + } + + ++m_pending_frame_rate_hint_votes; + if (m_pending_frame_rate_since.time_since_epoch().count() == 0) { + m_pending_frame_rate_since = now; + } + + const auto stable_for = now - m_pending_frame_rate_since; + const float reference_rate = std::max(frame_rate_hint, 1.0f); + const auto stable_duration = std::chrono::duration_cast( + std::chrono::duration(std::clamp(3.0f / reference_rate, 0.15f, 0.40f))); + constexpr std::uint32_t MinStableVotes = 3; + + if (m_pending_frame_rate_hint_votes < MinStableVotes || stable_for < stable_duration) { + return; + } + } else { + m_pending_frame_rate_since = now; + } + + using SetFrameRateWithChangeStrategyFn = + int32_t (*)(ANativeWindow*, float, int8_t, int8_t); + using SetFrameRateFn = int32_t (*)(ANativeWindow*, float, int8_t); + static const auto set_frame_rate_with_change_strategy = + reinterpret_cast( + dlsym(RTLD_DEFAULT, "ANativeWindow_setFrameRateWithChangeStrategy")); + static const auto set_frame_rate = reinterpret_cast( + dlsym(RTLD_DEFAULT, "ANativeWindow_setFrameRate")); + + constexpr int8_t FrameRateCompatibilityDefault = 0; + constexpr int8_t ChangeFrameRateOnlyIfSeamless = 0; + + int32_t result = -1; + if (set_frame_rate_with_change_strategy) { + result = set_frame_rate_with_change_strategy(surface, frame_rate_hint, + FrameRateCompatibilityDefault, + ChangeFrameRateOnlyIfSeamless); + } else if (set_frame_rate) { + result = set_frame_rate(surface, frame_rate_hint, FrameRateCompatibilityDefault); + } else { + return; + } + + if (result != 0) { + LOG_DEBUG(Frontend, "Failed to update Android surface frame rate hint: {}", result); + return; + } + + m_last_frame_rate_hint = frame_rate_hint; + m_pending_frame_rate_hint = frame_rate_hint; + m_pending_frame_rate_hint_votes = 0; + m_pending_frame_rate_since = {}; +} + EmuWindow_Android::EmuWindow_Android(ANativeWindow* surface, std::shared_ptr driver_library) : m_driver_library{driver_library} { diff --git a/src/android/app/src/main/jni/emu_window/emu_window.h b/src/android/app/src/main/jni/emu_window/emu_window.h index d7b5fc6dac..b73e8b9b4d 100644 --- a/src/android/app/src/main/jni/emu_window/emu_window.h +++ b/src/android/app/src/main/jni/emu_window/emu_window.h @@ -1,8 +1,13 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #pragma once +#include +#include #include #include @@ -50,10 +55,24 @@ public: }; private: + using Clock = std::chrono::steady_clock; + + void UpdateFrameRateHint(); + void UpdateObservedFrameRate(); + [[nodiscard]] float GetFrameRateHint() const; + [[nodiscard]] float GetFrameTimeVerifiedHint() const; + [[nodiscard]] static float QuantizeFrameRateHint(float frame_rate); + float m_window_width{}; float m_window_height{}; std::shared_ptr m_driver_library; bool m_first_frame = false; + float m_last_frame_rate_hint = -1.0f; + float m_pending_frame_rate_hint = -1.0f; + float m_smoothed_present_rate = 0.0f; + Clock::time_point m_last_frame_display_time{}; + Clock::time_point m_pending_frame_rate_since{}; + std::uint32_t m_pending_frame_rate_hint_votes = 0; }; diff --git a/src/android/app/src/main/res/drawable/ic_launcher_foreground.png b/src/android/app/src/main/res/drawable/ic_launcher_foreground.png index 1ccbbd4a5d..8b970cd4cc 100644 Binary files a/src/android/app/src/main/res/drawable/ic_launcher_foreground.png and b/src/android/app/src/main/res/drawable/ic_launcher_foreground.png differ diff --git a/src/android/app/src/main/res/drawable/ic_yuzu.png b/src/android/app/src/main/res/drawable/ic_yuzu.png index c03a370305..7e2461ba24 100644 Binary files a/src/android/app/src/main/res/drawable/ic_yuzu.png and b/src/android/app/src/main/res/drawable/ic_yuzu.png differ diff --git a/src/android/app/src/main/res/drawable/ic_yuzu_splash.png b/src/android/app/src/main/res/drawable/ic_yuzu_splash.png index 2500b856b5..c9404d9937 100644 Binary files a/src/android/app/src/main/res/drawable/ic_yuzu_splash.png and b/src/android/app/src/main/res/drawable/ic_yuzu_splash.png differ diff --git a/src/android/app/src/main/res/layout/fragment_profile_manager.xml b/src/android/app/src/main/res/layout/fragment_profile_manager.xml index e70a3f3da5..67828ca69e 100644 --- a/src/android/app/src/main/res/layout/fragment_profile_manager.xml +++ b/src/android/app/src/main/res/layout/fragment_profile_manager.xml @@ -5,21 +5,22 @@ xmlns:tools="http://schemas.android.com/tools" android:layout_width="match_parent" android:layout_height="match_parent" - android:background="?attr/colorSurface" - android:fitsSystemWindows="true"> + android:background="?attr/colorSurface"> + android:fitsSystemWindows="true" + android:touchscreenBlocksFocus="false"> diff --git a/src/android/app/src/main/res/mipmap-hdpi/ic_launcher.png b/src/android/app/src/main/res/mipmap-hdpi/ic_launcher.png index f83b149c8e..74c6677dd9 100644 Binary files a/src/android/app/src/main/res/mipmap-hdpi/ic_launcher.png and b/src/android/app/src/main/res/mipmap-hdpi/ic_launcher.png differ diff --git a/src/android/app/src/main/res/mipmap-mdpi/ic_launcher.png b/src/android/app/src/main/res/mipmap-mdpi/ic_launcher.png index a790c42402..31a01461b4 100644 Binary files a/src/android/app/src/main/res/mipmap-mdpi/ic_launcher.png and b/src/android/app/src/main/res/mipmap-mdpi/ic_launcher.png differ diff --git a/src/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png b/src/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png index 2719752e3c..3f0023f573 100644 Binary files a/src/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png and b/src/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png differ diff --git a/src/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png b/src/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png index c9df40719b..6e28b3d598 100644 Binary files a/src/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png and b/src/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png differ diff --git a/src/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png b/src/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png index 919453824f..39f583b630 100644 Binary files a/src/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png and b/src/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png differ diff --git a/src/android/app/src/main/res/values-ar/strings.xml b/src/android/app/src/main/res/values-ar/strings.xml index 2a3d430716..577b88396b 100644 --- a/src/android/app/src/main/res/values-ar/strings.xml +++ b/src/android/app/src/main/res/values-ar/strings.xml @@ -473,8 +473,6 @@ يتحكم في وضع محاكاة وحدة معالجة الرسومات. تعمل معظم الألعاب بشكل جيد مع وضعي سريع أو متوازن، لكن الوضع الدقيق لا يزال مطلوبًا لبعض الألعاب. تميل الجسيمات إلى العرض بشكل صحيح فقط عند استخدام الوضع الدقيق. دقة DMA يتحكم في دقة DMA. يمكن أن تؤدي الدقة الآمنة إلى حل المشكلات في بعض الألعاب، ولكنها قد تؤثر أيضًا على الأداء في بعض الحالات. إذا لم تكن متأكدًا، فاترك هذا الخيار على الإعداد الافتراضي. - وضع توقيت الإطارات - يتحكم في كيفية إدارة المحاكي لسرعة الإطارات لتقليل التقطع وجعل معدل الإطارات أكثر سلاسة واتساقًا. تصفية متباينة الخواص يحسن جودة الأنسجة عند عرضها بزوايا مائلة وضع استخدام ذاكرة VRAM @@ -999,13 +997,6 @@ غير آمن آمن - - تلقائي - 30 إطارًا في الثانية - 60 إطارًا في الثانية - 90 إطارًا في الثانية - 120 إطارًا في الثانية - غير مضغوط diff --git a/src/android/app/src/main/res/values-cs/strings.xml b/src/android/app/src/main/res/values-cs/strings.xml index 979f1dd707..f42e743173 100644 --- a/src/android/app/src/main/res/values-cs/strings.xml +++ b/src/android/app/src/main/res/values-cs/strings.xml @@ -452,8 +452,6 @@ Určuje režim emulovaného GPU. Většina her běží bez problémů v rychlém, nebo vyváženém režimu, ale některé stále vyžadují přesný režim. Částicové efekty se většinou zobrazují korektně pouze v přesném režimu. Přesnost DMA Ovládá přesnost DMA. Bezpečná přesnost může vyřešit problémy v některých hrách, ale v některých případech může také ovlivnit výkon. Pokud si nejste jisti, použijte výchozí nastavení. - Režim Framepacingu - Řídí způsob jakým emulátor spravuje časování snímku aby snížil trhání a zlepšení plynulost a stabilitu snímkové frekvence. Anizotropní filtrování Zlepšuje kvalitu textur při pohledu pod úhlem Režim využití VRAM diff --git a/src/android/app/src/main/res/values-es/strings.xml b/src/android/app/src/main/res/values-es/strings.xml index 0b3955ac4b..f9d921745e 100644 --- a/src/android/app/src/main/res/values-es/strings.xml +++ b/src/android/app/src/main/res/values-es/strings.xml @@ -467,8 +467,6 @@ Controla el modo de la emulación de la GPU. La mayoría de los juegos se renderizan correctamente en los modos Rápido o Equilibrado, pero algunos requieren Preciso. Las partículas tienden a renderizarse correctamente solo con el modo Preciso. Precisión de DMA Controla la precisión de DMA. La precisión segura puede solucionar problemas en algunos juegos, pero también puede afectar al rendimiento en algunos casos. Si no está seguro, déjelo en Predeterminado. - Modo de ritmo de fotogramas - Controla cómo el emulador gestiona el ritmo de los fotogramas para reducir los tirones y hacer que la velocidad de los fotogramas sea más suave y consistente. Filtrado anisotrópico Mejora la calidad de las texturas al ser observadas desde ángulos oblicuos Modo de uso de VRAM @@ -993,13 +991,6 @@ Inseguro Seguro - - Automático - 30 FPS - 60 FPS - 90 FPS - 120 FPS - Sin compresión diff --git a/src/android/app/src/main/res/values-ru/strings.xml b/src/android/app/src/main/res/values-ru/strings.xml index 2a6c559cf2..718b2da696 100644 --- a/src/android/app/src/main/res/values-ru/strings.xml +++ b/src/android/app/src/main/res/values-ru/strings.xml @@ -468,8 +468,6 @@ Управляет режимом эмуляции графического процессора. Большинство игр нормально отображаются в режимах «Быстрый» или «Сбалансированный», но для некоторых требуется режим «Точный». Частицы обычно корректно отображаются только в режиме «Точный». Точность DMA Управляет точностью DMA. Безопасная точность может исправить проблемы в некоторых играх, но в некоторых случаях также может повлиять на производительность. Если не уверены, оставьте значение По умолчанию. - Режим синхронизации кадров - Управляет синхронизацией кадров в эмуляторе для уменьшения рывков и обеспечения более плавной и стабильной частоты кадров. Анизотропная фильтрация Улучшает качество текстур под углом Режим VRAM @@ -994,13 +992,6 @@ Небезопасно Безопасный - - Авто - 30 FPS - 60 FPS - 90 FPS - 120 FPS - Без сжатия diff --git a/src/android/app/src/main/res/values-uk/strings.xml b/src/android/app/src/main/res/values-uk/strings.xml index 1f78a85773..5f1dbd8e28 100644 --- a/src/android/app/src/main/res/values-uk/strings.xml +++ b/src/android/app/src/main/res/values-uk/strings.xml @@ -469,8 +469,6 @@ Керує режимом емуляції ГП. Більшість ігор добре візуалізуються з режимами «Швидко» або «Збалансовано», але деякі ігри можуть потребувати режиму «Точно». Частинки зазвичай правильно візуалізуються лише з режимом «Точно». Точність DMA Керує точністю DMA. Безпечна точність може виправити проблеми в деяких іграх, але в деяких випадках також може вплинути на продуктивність. Якщо не впевнені, залиште це значення за замовчуванням. - Режим виведення кадрів - Керує тим, як емулятор виконує виведення кадрів, щоб зменшити затримки й забезпечити плавнішу й стабільнішу частоту кадрів. Анізотропне фільтрування Покращує якість текстур під кутом. Режим використання VRAM @@ -995,13 +993,6 @@ Небезпечно Безпечно - - Автоматично - 30 к/с - 60 к/с - 90 к/с - 120 к/с - Без стиснення diff --git a/src/android/app/src/main/res/values-zh-rCN/strings.xml b/src/android/app/src/main/res/values-zh-rCN/strings.xml index bb5fe53596..beda261a2e 100644 --- a/src/android/app/src/main/res/values-zh-rCN/strings.xml +++ b/src/android/app/src/main/res/values-zh-rCN/strings.xml @@ -462,8 +462,6 @@ 控制 GPU 模拟的精确度。大部分游戏在性能或平衡模式下可以正常渲染,但部分游戏需要设置为精确。粒子效果通常只有在精确模式下才能正确显示。 DMA 精度 控制 DMA 精度。安全精度可以修复某些游戏中的问题,但在某些情况下也可能影响性能。如果不确定,请保留为“默认”。 - 帧同步模式 - 控制模拟器如何管理帧同步,以减少卡顿,使帧率表现更加平稳顺滑。 各向异性过滤 提高斜角的纹理质量 显存使用模式 @@ -988,13 +986,6 @@ 不安全 安全 - - 自动 - 30 FPS - 60 FPS - 90 FPS - 120 FPS - 不压缩 diff --git a/src/android/app/src/main/res/values/arrays.xml b/src/android/app/src/main/res/values/arrays.xml index 565decb390..9f787ab00c 100644 --- a/src/android/app/src/main/res/values/arrays.xml +++ b/src/android/app/src/main/res/values/arrays.xml @@ -533,20 +533,6 @@ 2 - - @string/frame_pacing_mode_target_Auto - @string/frame_pacing_mode_target_30 - @string/frame_pacing_mode_target_60 - @string/frame_pacing_mode_target_90 - @string/frame_pacing_mode_target_120 - - - 0 - 1 - 2 - 3 - 4 - @string/applet_hle diff --git a/src/android/app/src/main/res/values/colors.xml b/src/android/app/src/main/res/values/colors.xml index 7c3dd1a8c2..ad3412ed27 100644 --- a/src/android/app/src/main/res/values/colors.xml +++ b/src/android/app/src/main/res/values/colors.xml @@ -1 +1 @@ -#3cce5bff +#43fcfcff diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml index 83c04ad40e..74c5bcd276 100644 --- a/src/android/app/src/main/res/values/strings.xml +++ b/src/android/app/src/main/res/values/strings.xml @@ -479,8 +479,6 @@ Controls the GPU emulation mode. Most games render fine with Fast or Balanced modes, but Accurate is still required for some. Particles tend to only render correctly with Accurate mode. DMA Accuracy Controls the DMA precision accuracy. Safe precision can fix issues in some games, but it can also impact performance in some cases. If unsure, leave this on Default. - Frame Pacing Mode - Controls how the emulator manages frame pacing to reduce stuttering and make the frame rate smoother and more consistent. Anisotropic filtering Improves the quality of textures when viewed at oblique angles VRAM Usage Mode @@ -1038,13 +1036,6 @@ Unsafe Safe - - Auto - 30 FPS - 60 FPS - 90 FPS - 120 FPS - CPU GPU @@ -1160,6 +1151,8 @@ Material You App Settings Theme And Color + Fullscreen mode + Hide Android system bars across app screens. Swipe from an edge to reveal them temporarily. Change theme mode @@ -1227,7 +1220,7 @@ Blue Cyan Red - Green (Default) + Green Yellow Orange Pink diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt index 4f8cc7d1ca..e31fd2c5b9 100644 --- a/src/audio_core/CMakeLists.txt +++ b/src/audio_core/CMakeLists.txt @@ -222,10 +222,8 @@ if (MSVC) ) else() target_compile_options(audio_core PRIVATE - -Werror=conversion - - -Wno-sign-conversion - ) + $<$:-Werror=conversion> + $<$:-Wno-sign-conversion>) endif() target_include_directories(audio_core PRIVATE ${OPUS_INCLUDE_DIRS}) diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index fc3d9b59d8..98c1688441 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -134,6 +134,8 @@ add_library( typed_address.h uint128.h unique_function.h + random.cpp + random.h uuid.cpp uuid.h vector_math.h @@ -225,8 +227,9 @@ else() endif() if(CXX_CLANG) - target_compile_options(common PRIVATE -fsized-deallocation - -Werror=unreachable-code-aggressive) + target_compile_options(common PRIVATE + $<$:-fsized-deallocation> + $<$:-Werror=unreachable-code-aggressive>) target_compile_definitions( common PRIVATE diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index d4b1709377..04f3a65778 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -30,6 +30,8 @@ #include #include #include +#elif defined(__FreeBSD__) +#include #endif // FreeBSD @@ -503,8 +505,7 @@ public: fd = shm_open_anon(O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW, 0600); #elif defined(__OpenBSD__) fd = shm_open_anon(O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW, 0600); -#elif defined(__FreeBSD__) && __FreeBSD__ < 13 - // XXX Drop after FreeBSD 12.* reaches EOL on 2024-06-30 +#elif defined(__FreeBSD__) fd = shm_open(SHM_ANON, O_RDWR, 0600); #elif defined(__APPLE__) // macOS doesn't have memfd_create, use anonymous temporary file @@ -571,9 +572,9 @@ public: if (True(perms & MemoryPermission::Execute)) prot_flags |= PROT_EXEC; #endif - int flags = (fd > 0 ? MAP_SHARED : MAP_PRIVATE) | MAP_FIXED; + int flags = (fd >= 0 ? MAP_SHARED : MAP_PRIVATE) | MAP_FIXED; void* ret = mmap(virtual_base + virtual_offset, length, prot_flags, flags, fd, host_offset); - ASSERT_MSG(ret != MAP_FAILED, "mmap: {}", strerror(errno)); + ASSERT_MSG(ret != MAP_FAILED, "mmap: {} {}", strerror(errno), fd); } void Unmap(size_t virtual_offset, size_t length) { @@ -587,9 +588,8 @@ public: auto [merged_pointer, merged_size] = free_manager.FreeBlock(virtual_base + virtual_offset, length); - void* ret = mmap(merged_pointer, merged_size, PROT_NONE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); - ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno)); + void* ret = mmap(merged_pointer, merged_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + ASSERT_MSG(ret != MAP_FAILED, "mmap: {}", strerror(errno)); } void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) { diff --git a/src/common/random.cpp b/src/common/random.cpp new file mode 100644 index 0000000000..d951881cd2 --- /dev/null +++ b/src/common/random.cpp @@ -0,0 +1,22 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include +#include "common/random.h" + +namespace Common::Random { + [[nodiscard]] static std::random_device& GetGlobalRandomDevice() noexcept { + static std::random_device g_random_device{}; + return g_random_device; + } + [[nodiscard]] u32 Random32(u32 seed) noexcept { + return GetGlobalRandomDevice()(); + } + [[nodiscard]] u64 Random64(u64 seed) noexcept { + return GetGlobalRandomDevice()(); + } + [[nodiscard]] std::mt19937 GetMT19937() noexcept { + return std::mt19937(GetGlobalRandomDevice()()); + } +} diff --git a/src/common/random.h b/src/common/random.h new file mode 100644 index 0000000000..83210f6dc2 --- /dev/null +++ b/src/common/random.h @@ -0,0 +1,13 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include +#include "common/common_types.h" + +namespace Common::Random { + [[nodiscard]] u32 Random32(u32 seed) noexcept; + [[nodiscard]] u64 Random64(u64 seed) noexcept; + [[nodiscard]] std::mt19937 GetMT19937() noexcept; +} diff --git a/src/common/settings.cpp b/src/common/settings.cpp index f549169cd2..54848c4dd1 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -186,7 +186,7 @@ bool IsFastmemEnabled() { // Only 4kb systems support host MMU right now // TODO: Support this return getpagesize() == 4096; -#elif !defined(__APPLE__) && !defined(__ANDROID__) && !defined(_WIN32) && !defined(__linux__) +#elif !defined(__APPLE__) && !defined(__ANDROID__) && !defined(_WIN32) && !defined(__linux__) && !defined(__FreeBSD__) return false; #else return true; diff --git a/src/common/tiny_mt.h b/src/common/tiny_mt.h index c9f9ed4a5d..4b556a33eb 100644 --- a/src/common/tiny_mt.h +++ b/src/common/tiny_mt.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project @@ -218,12 +218,6 @@ public: return t0; } - u64 GenerateRandomU64() { - const u32 lo = this->GenerateRandomU32(); - const u32 hi = this->GenerateRandomU32(); - return (u64{hi} << 32) | u64{lo}; - } - float GenerateRandomF32() { // Floats have 24 bits of mantissa. constexpr u32 MantissaBits = 24; diff --git a/src/common/uuid.cpp b/src/common/uuid.cpp index 8f0dba452c..d4a5733c26 100644 --- a/src/common/uuid.cpp +++ b/src/common/uuid.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -10,6 +13,7 @@ #include "common/assert.h" #include "common/tiny_mt.h" #include "common/uuid.h" +#include "common/random.h" namespace Common { @@ -175,21 +179,16 @@ u128 UUID::AsU128() const { } UUID UUID::MakeRandom() { - std::random_device device; - - return MakeRandomWithSeed(device()); + return MakeRandomWithSeed(Common::Random::Random32(0)); } UUID UUID::MakeRandomWithSeed(u32 seed) { // Create and initialize our RNG. TinyMT rng; rng.Initialize(seed); - UUID uuid; - // Populate the UUID with random bytes. rng.GenerateRandomBytes(uuid.uuid.data(), sizeof(UUID)); - return uuid; } diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 7566372b51..08a2d0e2db 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1195,13 +1195,13 @@ if (MSVC) ) else() target_compile_options(core PRIVATE - -Werror=conversion - -Wno-sign-conversion - -Wno-cast-function-type + $<$:-Werror=conversion> + $<$:-Wno-sign-conversion> + $<$:-Wno-cast-function-type> $<$:-fsized-deallocation>) # pre-clang19 will spam with "OH DID YOU MEAN THIS?" otherwise... if (CXX_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 19) - target_compile_options(core PRIVATE -Wno-cast-function-type-mismatch) + target_compile_options(core PRIVATE $<$:-Wno-cast-function-type-mismatch>) endif() endif() diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 912a15475b..1520f128cf 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -186,7 +186,7 @@ void ArmDynarmic32::MakeJit(Common::PageTable* page_table) { config.only_detect_misalignment_via_page_table_on_page_boundary = true; config.fastmem_pointer = page_table->fastmem_arena ? - std::optional{reinterpret_cast(page_table->fastmem_arena)} : + std::optional{uintptr_t(page_table->fastmem_arena)} : std::nullopt; config.fastmem_exclusive_access = config.fastmem_pointer != std::nullopt; @@ -286,10 +286,6 @@ void ArmDynarmic32::MakeJit(Common::PageTable* page_table) { // Curated optimizations case Settings::CpuAccuracy::Auto: config.unsafe_optimizations = true; -#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__sun__) || defined(__HAIKU__) || defined(__DragonFly__) || defined(__NetBSD__) - config.fastmem_pointer = std::nullopt; - config.fastmem_exclusive_access = false; -#endif config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; @@ -304,6 +300,10 @@ void ArmDynarmic32::MakeJit(Common::PageTable* page_table) { default: break; } + if (!Settings::IsFastmemEnabled()) { + config.fastmem_pointer = std::nullopt; + config.fastmem_exclusive_access = false; + } m_jit.emplace(config); } diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 55584d0e38..fe7fb5983f 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -338,10 +338,6 @@ void ArmDynarmic64::MakeJit(Common::PageTable* page_table, std::size_t address_s // Safe optimisations case Settings::CpuAccuracy::Auto: config.unsafe_optimizations = true; -#if !defined(__APPLE__) && !defined(__linux__) && !defined(__ANDROID__) && !defined(_WIN32) - config.fastmem_pointer = std::nullopt; - config.fastmem_exclusive_access = false; -#endif config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; config.fastmem_address_space_bits = 64; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor; @@ -355,6 +351,10 @@ void ArmDynarmic64::MakeJit(Common::PageTable* page_table, std::size_t address_s default: break; } + if (!Settings::IsFastmemEnabled()) { + config.fastmem_pointer = std::nullopt; + config.fastmem_exclusive_access = false; + } m_jit.emplace(config); } diff --git a/src/core/arm/nce/interpreter_visitor.cpp b/src/core/arm/nce/interpreter_visitor.cpp index be6fee8613..077a696cc8 100644 --- a/src/core/arm/nce/interpreter_visitor.cpp +++ b/src/core/arm/nce/interpreter_visitor.cpp @@ -773,7 +773,11 @@ std::optional MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, m bool was_executed = false; auto decoder = Dynarmic::A64::Decode(instruction); - was_executed = decoder.get().call(visitor, instruction); + if (decoder) { + was_executed = decoder->get().call(visitor, instruction); + } else { + was_executed = false; + } return was_executed ? std::optional(pc + 4) : std::nullopt; } diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp index 61671ea333..af41820a36 100644 --- a/src/core/file_sys/registered_cache.cpp +++ b/src/core/file_sys/registered_cache.cpp @@ -12,6 +12,7 @@ #include "common/fs/path_util.h" #include "common/hex_util.h" #include "common/logging.h" +#include "common/random.h" #include "common/string_util.h" #include "core/crypto/key_manager.h" #include "core/file_sys/card_image.h" @@ -490,17 +491,13 @@ std::vector PlaceholderCache::List() const { } NcaID PlaceholderCache::Generate() { - std::random_device device; - std::mt19937 gen(device()); + auto gen = Common::Random::GetMT19937(); std::uniform_int_distribution distribution(1, (std::numeric_limits::max)()); - NcaID out{}; - const auto v1 = distribution(gen); const auto v2 = distribution(gen); std::memcpy(out.data(), &v1, sizeof(u64)); std::memcpy(out.data() + sizeof(u64), &v2, sizeof(u64)); - return out; } diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp index 1446653916..3343d1d282 100644 --- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp +++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project @@ -7,6 +7,7 @@ #include #include "common/literals.h" +#include "common/random.h" #include "common/settings.h" #include "core/hle/kernel/board/nintendo/nx/k_system_control.h" @@ -201,15 +202,8 @@ u64 GenerateUniformRange(u64 min, u64 max, F f) { } // Anonymous namespace -u64 KSystemControl::GenerateRandomU64() { - std::random_device device; - std::mt19937 gen(device()); - std::uniform_int_distribution distribution(1, (std::numeric_limits::max)()); - return distribution(gen); -} - u64 KSystemControl::GenerateRandomRange(u64 min, u64 max) { - return GenerateUniformRange(min, max, GenerateRandomU64); + return GenerateUniformRange(min, max, Common::Random::GetMT19937()); } size_t KSystemControl::CalculateRequiredSecureMemorySize(size_t size, u32 pool) { diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.h b/src/core/hle/kernel/board/nintendo/nx/k_system_control.h index 60c5e58b73..41a25ba1c8 100644 --- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.h +++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -33,7 +36,6 @@ public: // Randomness. static u64 GenerateRandomRange(u64 min, u64 max); - static u64 GenerateRandomU64(); // Secure Memory. static size_t CalculateRequiredSecureMemorySize(size_t size, u32 pool); diff --git a/src/core/hle/kernel/k_page_bitmap.h b/src/core/hle/kernel/k_page_bitmap.h index fc21b81574..27bd682c5c 100644 --- a/src/core/hle/kernel/k_page_bitmap.h +++ b/src/core/hle/kernel/k_page_bitmap.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project @@ -14,6 +14,7 @@ #include "common/bit_util.h" #include "common/common_types.h" #include "common/tiny_mt.h" +#include "common/random.h" #include "core/hle/kernel/k_system_control.h" namespace Kernel { @@ -23,7 +24,7 @@ public: class RandomBitGenerator { public: RandomBitGenerator() { - m_rng.Initialize(static_cast(KSystemControl::GenerateRandomU64())); + m_rng.Initialize(u32(Common::Random::Random64(0))); } u64 SelectRandomBit(u64 bitmap) { diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp index 17bdb7b6fa..ea9b7eb114 100644 --- a/src/core/hle/kernel/k_thread.cpp +++ b/src/core/hle/kernel/k_thread.cpp @@ -20,6 +20,7 @@ #include "common/fiber.h" #include "common/logging.h" #include "common/settings.h" +#include "common/random.h" #include "core/core.h" #include "core/cpu_manager.h" #include "core/hardware_properties.h" @@ -45,8 +46,7 @@ namespace { constexpr inline s32 TerminatingThreadPriority = Kernel::Svc::SystemThreadPriorityHighest - 1; -static void ResetThreadContext32(Kernel::Svc::ThreadContext& ctx, u64 stack_top, u64 entry_point, - u64 arg) { +static void ResetThreadContext32(Kernel::Svc::ThreadContext& ctx, u64 stack_top, u64 entry_point, u64 arg) { ctx = {}; ctx.r[0] = arg; ctx.r[15] = entry_point; @@ -55,11 +55,10 @@ static void ResetThreadContext32(Kernel::Svc::ThreadContext& ctx, u64 stack_top, ctx.fpsr = 0; } -static void ResetThreadContext64(Kernel::Svc::ThreadContext& ctx, u64 stack_top, u64 entry_point, - u64 arg) { +static void ResetThreadContext64(Kernel::Svc::ThreadContext& ctx, u64 stack_top, u64 entry_point, u64 arg) { ctx = {}; ctx.r[0] = arg; - ctx.r[18] = Kernel::KSystemControl::GenerateRandomU64() | 1; + ctx.r[18] = Common::Random::Random64(0) | 1; ctx.pc = entry_point; ctx.sp = stack_top; ctx.fpcr = 0; diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp index d8767f9fed..43c569851d 100644 --- a/src/core/hle/service/acc/acc.cpp +++ b/src/core/hle/service/acc/acc.cpp @@ -712,9 +712,8 @@ public: private: void CheckAvailability(HLERequestContext& ctx) { LOG_DEBUG(Service_ACC, "(STUBBED) called"); - IPC::ResponseBuilder rb{ctx, 3}; + IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ResultSuccess); - rb.Push(false); // TODO: Check when this is supposed to return true and when not } void GetAccountId(HLERequestContext& ctx) { diff --git a/src/core/hle/service/cmif_serialization.h b/src/core/hle/service/cmif_serialization.h index 4d32c6cd6b..75461cc6be 100644 --- a/src/core/hle/service/cmif_serialization.h +++ b/src/core/hle/service/cmif_serialization.h @@ -438,20 +438,20 @@ void WriteOutArgument(bool is_domain, CallArguments& args, u8* raw_data, HLERequ template void CmifReplyWrapImpl(HLERequestContext& ctx, T& t, Result (T::*f)(A...)) { + const auto mgr = ctx.GetManager().get(); // Verify domain state. if constexpr (!Domain) { - const auto _mgr = ctx.GetManager(); - const bool _is_domain = _mgr ? _mgr->IsDomain() : false; - ASSERT_MSG(!_is_domain, - "Non-domain reply used on domain session\n" - "Service={} (TIPC={} CmdType={} Cmd=0x{:08X}\n" - "HasDomainHeader={} DomainHandlers={}\nDesc={}", - t.GetServiceName(), ctx.IsTipc(), - static_cast(ctx.GetCommandType()), static_cast(ctx.GetCommand()), - ctx.HasDomainMessageHeader(), _mgr ? static_cast(_mgr->DomainHandlerCount()) : 0u, - ctx.Description()); + const bool is_domain = mgr ? mgr->IsDomain() : false; + ASSERT_MSG(!is_domain, + "Non-domain reply used on domain session\n" + "Service={} (TIPC={} CmdType={} Cmd=0x{:08X}\n" + "HasDomainHeader={} DomainHandlers={}\nDesc={}", + t.GetServiceName(), ctx.IsTipc(), + u32(ctx.GetCommandType()), u32(ctx.GetCommand()), + ctx.HasDomainMessageHeader(), mgr ? u32(mgr->DomainHandlerCount()) : 0u, + ctx.Description()); } - const bool is_domain = Domain ? ctx.GetManager()->IsDomain() : false; + const bool is_domain = Domain ? mgr->IsDomain() : false; static_assert(ConstIfReference(), "Arguments taken by reference must be const"); using MethodArguments = std::tuple...>; diff --git a/src/core/hle/service/ipc_helpers.h b/src/core/hle/service/ipc_helpers.h index 4b02872fba..8aee17db8d 100644 --- a/src/core/hle/service/ipc_helpers.h +++ b/src/core/hle/service/ipc_helpers.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2016 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -78,32 +81,29 @@ public: memset(cmdbuf, 0, sizeof(u32) * IPC::COMMAND_BUFFER_LENGTH); IPC::CommandHeader header{}; + auto const mgr = ctx.GetManager().get(); // The entire size of the raw data section in u32 units, including the 16 bytes of mandatory // padding. - u32 raw_data_size = ctx.write_size = - ctx.IsTipc() ? normal_params_size - 1 : normal_params_size; + u32 raw_data_size = ctx.write_size = ctx.IsTipc() ? normal_params_size - 1 : normal_params_size; u32 num_handles_to_move{}; u32 num_domain_objects{}; - const bool always_move_handles{ - (static_cast(flags) & static_cast(Flags::AlwaysMoveHandles)) != 0}; - if (!ctx.GetManager()->IsDomain() || always_move_handles) { + const bool always_move_handles = (u32(flags) & u32(Flags::AlwaysMoveHandles)) != 0; + if (!mgr->IsDomain() || always_move_handles) { num_handles_to_move = num_objects_to_move; } else { num_domain_objects = num_objects_to_move; } - if (ctx.GetManager()->IsDomain()) { - raw_data_size += - static_cast(sizeof(DomainMessageHeader) / sizeof(u32) + num_domain_objects); + if (mgr->IsDomain()) { + raw_data_size += u32(sizeof(DomainMessageHeader) / sizeof(u32) + num_domain_objects); ctx.write_size += num_domain_objects; } if (ctx.IsTipc()) { header.type.Assign(ctx.GetCommandType()); } else { - raw_data_size += static_cast(sizeof(IPC::DataPayloadHeader) / sizeof(u32) + 4 + - normal_params_size); + raw_data_size += u32(sizeof(IPC::DataPayloadHeader) / sizeof(u32) + 4 + normal_params_size); } header.data_size.Assign(raw_data_size); @@ -126,7 +126,7 @@ public: if (!ctx.IsTipc()) { AlignWithPadding(); - if (ctx.GetManager()->IsDomain() && ctx.HasDomainMessageHeader()) { + if (mgr->IsDomain() && ctx.HasDomainMessageHeader()) { IPC::DomainMessageHeader domain_header{}; domain_header.num_objects = num_domain_objects; PushRaw(domain_header); diff --git a/src/core/hle/service/mii/mii_util.h b/src/core/hle/service/mii/mii_util.h index 3534fa31d5..2ef006765c 100644 --- a/src/core/hle/service/mii/mii_util.h +++ b/src/core/hle/service/mii/mii_util.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -7,6 +10,7 @@ #include #include "common/common_types.h" +#include "common/random.h" #include "common/swap.h" #include "common/uuid.h" #include "core/hle/service/mii/mii_types.h" @@ -65,11 +69,9 @@ public: template static T GetRandomValue(T min, T max) { - std::random_device device; - std::mt19937 gen(device()); - std::uniform_int_distribution distribution(static_cast(min), - static_cast(max)); - return static_cast(distribution(gen)); + std::uniform_int_distribution distribution{u64(min), u64(max)}; + auto gen = Common::Random::GetMT19937(); + return T(distribution(gen)); } template diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp index 3ce8eb4eef..98fcaa3220 100644 --- a/src/core/hle/service/nifm/nifm.cpp +++ b/src/core/hle/service/nifm/nifm.cpp @@ -183,8 +183,8 @@ struct NifmWirelessSettingData { static_assert(sizeof(NifmWirelessSettingData) == 0x70, "NifmWirelessSettingData has incorrect size."); -#pragma pack(push, 1) // This is nn::nifm::detail::sf::NetworkProfileData +#pragma pack(push, 1) struct SfNetworkProfileData { IpSettingData ip_setting_data{}; u128 uuid{}; @@ -196,9 +196,11 @@ struct SfNetworkProfileData { SfWirelessSettingData wireless_setting_data{}; INSERT_PADDING_BYTES(1); }; +#pragma pack(pop) static_assert(sizeof(SfNetworkProfileData) == 0x17C, "SfNetworkProfileData has incorrect size."); // This is nn::nifm::NetworkProfileData +#pragma pack(push, 1) struct NifmNetworkProfileData { u128 uuid{}; std::array network_name{}; diff --git a/src/core/hle/service/sockets/sfdnsres.cpp b/src/core/hle/service/sockets/sfdnsres.cpp index 68d73f0a59..f9f0ee56eb 100644 --- a/src/core/hle/service/sockets/sfdnsres.cpp +++ b/src/core/hle/service/sockets/sfdnsres.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project @@ -53,12 +53,15 @@ enum class NetDbError : s32 { NoData = 4, }; -static const constexpr std::array blockedDomains = {"srv.nintendo.net", - "battle.net", - "microsoft.com", - "mojang.com", - "xboxlive.com", - "minecraftservices.com"}; +static const constexpr std::array blockedDomains = { + "srv.nintendo.net", //obvious + "phoenix-api.wbagora.com", //hogwarts legacy + "battle.net", + "microsoft.com", //minecraft dungeons + other games + "mojang.com", + "xboxlive.com", + "minecraftservices.com" +}; static bool IsBlockedHost(const std::string& host) { return std::any_of( diff --git a/src/core/hle/service/vi/shared_buffer_manager.cpp b/src/core/hle/service/vi/shared_buffer_manager.cpp index 1568344830..21e42e2094 100644 --- a/src/core/hle/service/vi/shared_buffer_manager.cpp +++ b/src/core/hle/service/vi/shared_buffer_manager.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project @@ -373,6 +373,8 @@ Result SharedBufferManager::PresentSharedFrameBuffer(android::Fence fence, android::Status::NoError, VI::ResultOperationFailed); + (void)m_container.SetLayerZIndex(layer_id, 100000); + // We succeeded. R_SUCCEED(); } diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index 4e0b119f21..2ea63e137e 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp @@ -6,6 +6,7 @@ #include #include "common/logging.h" +#include "common/random.h" #include "common/settings.h" #include "core/core.h" #include "core/file_sys/content_archive.h" @@ -229,7 +230,7 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect // TODO: this is bad form of ASLR, it sucks size_t aslr_offset = ((::Settings::values.rng_seed_enabled.GetValue() ? ::Settings::values.rng_seed.GetValue() - : std::rand()) * 0x734287f27) & 0xfff000; + : Common::Random::Random64(0)) * 0x734287f27) & 0xfff000; // Setup the process code layout if (process.LoadFromMetadata(metadata, code_size, fastmem_base, aslr_offset, is_hbl).IsError()) { diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp index 97fe891c43..3a16c369b1 100644 --- a/src/core/loader/kip.cpp +++ b/src/core/loader/kip.cpp @@ -6,6 +6,7 @@ #include #include "common/settings.h" +#include "common/random.h" #include "core/file_sys/kernel_executable.h" #include "core/file_sys/program_metadata.h" #include "core/hle/kernel/code_set.h" @@ -88,7 +89,7 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::KProcess& process, // TODO: this is bad form of ASLR, it sucks size_t aslr_offset = ((::Settings::values.rng_seed_enabled.GetValue() ? ::Settings::values.rng_seed.GetValue() - : std::rand()) * 0x734287f27) & 0xfff000; + : Common::Random::Random64(0)) * 0x734287f27) & 0xfff000; // Setup the process code layout if (process.LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), codeset.memory.size(), 0, aslr_offset, false).IsError()) { diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index 76e784fe15..70178cbb9b 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp @@ -11,6 +11,7 @@ #include "common/common_types.h" #include "common/logging.h" #include "common/settings.h" +#include "common/random.h" #include "common/swap.h" #include "core/core.h" #include "core/file_sys/control_metadata.h" @@ -243,7 +244,7 @@ static bool LoadNroImpl(Core::System& system, Kernel::KProcess& process, // TODO: this is bad form of ASLR, it sucks size_t aslr_offset = ((::Settings::values.rng_seed_enabled.GetValue() ? ::Settings::values.rng_seed.GetValue() - : std::rand()) * 0x734287f27) & 0xfff000; + : Common::Random::Random64(0)) * 0x734287f27) & 0xfff000; // Setup the process code layout if (process diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 5c57df424c..3a9ea308a8 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -45,11 +45,7 @@ static inline bool AddressSpaceContains(const Common::PageTable& table, const Co // from outside classes. This also allows modification to the internals of the memory // subsystem without needing to rebuild all files that make use of the memory interface. struct Memory::Impl { - explicit Impl(Core::System& system_) : system{system_} { - // Initialize thread count based on available cores for parallel memory operations - const unsigned int hw_concurrency = std::thread::hardware_concurrency(); - thread_count = (std::max)(2u, (std::min)(hw_concurrency, 8u)); // Limit to 8 threads max - } + explicit Impl(Core::System& system_) : system{system_} {} void SetCurrentPageTable(Kernel::KProcess& process) { current_page_table = &process.GetPageTable().GetImpl(); @@ -856,13 +852,7 @@ struct Memory::Impl { Tegra::MaxwellDeviceMemoryManager* gpu_device_memory{}; Common::PageTable* current_page_table = nullptr; - // Number of threads to use for parallel memory operations - unsigned int thread_count = 2; - - // Minimum size in bytes for which parallel processing is beneficial - //size_t PARALLEL_THRESHOLD = (L3 CACHE * NUM PHYSICAL CORES); // 64 KB - std::array - rasterizer_read_areas{}; + std::array rasterizer_read_areas{}; std::array rasterizer_write_areas{}; std::array, Core::Hardware::NUM_CPU_CORES> scratch_buffers{}; std::span gpu_dirty_managers; diff --git a/src/core/tools/renderdoc.cpp b/src/core/tools/renderdoc.cpp index 5fab291fe5..e49dac64a0 100644 --- a/src/core/tools/renderdoc.cpp +++ b/src/core/tools/renderdoc.cpp @@ -51,10 +51,12 @@ void RenderdocAPI::ToggleCapture() { if (!rdoc_api) [[unlikely]] { return; } + + auto* api = static_cast(rdoc_api); if (!is_capturing) { - rdoc_api->StartFrameCapture(NULL, NULL); + api->StartFrameCapture(NULL, NULL); } else { - rdoc_api->EndFrameCapture(NULL, NULL); + api->EndFrameCapture(NULL, NULL); } is_capturing = !is_capturing; } diff --git a/src/core/tools/renderdoc.h b/src/core/tools/renderdoc.h index 0e5e43da5b..1f72bef320 100644 --- a/src/core/tools/renderdoc.h +++ b/src/core/tools/renderdoc.h @@ -1,10 +1,11 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once -struct RENDERDOC_API_1_6_0; - namespace Tools { class RenderdocAPI { @@ -15,7 +16,7 @@ public: void ToggleCapture(); private: - RENDERDOC_API_1_6_0* rdoc_api{}; + void* rdoc_api{}; bool is_capturing{false}; }; diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index e0d1f942f0..dd1def5273 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -78,7 +78,9 @@ if (MSVC) /Zc:throwingNew # Assumes new (without std::nothrow) never returns null. /volatile:iso # Use strict standard-abiding volatile semantics /bigobj # Increase number of sections in .obj files - /DNOMINMAX) + /DNOMINMAX + /GR- + ) if (CXX_CLANG) list(APPEND DYNARMIC_CXX_FLAGS @@ -91,8 +93,10 @@ else() -Wextra -Wcast-qual -pedantic - -Wno-missing-braces) - + -Wno-missing-braces + -fno-rtti + #-fno-exceptions + ) if (CXX_GCC) # GCC produces bogus -Warray-bounds warnings from xbyak headers for code paths that are not # actually reachable. Specifically, it happens in cases where some code casts an Operand& @@ -102,7 +106,6 @@ else() list(APPEND DYNARMIC_CXX_FLAGS -Wno-array-bounds) list(APPEND DYNARMIC_CXX_FLAGS -Wstack-usage=4096) endif() - if (CXX_CLANG) # Bracket depth determines maximum size of a fold expression in Clang since 9c9974c3ccb6. # And this in turns limits the size of a std::array. @@ -117,7 +120,6 @@ if (NOT Boost_FOUND) endif() find_package(fmt 8 CONFIG) -find_package(mcl 0.1.12 REQUIRED) find_package(unordered_dense REQUIRED) if ("arm64" IN_LIST ARCHITECTURE OR DYNARMIC_TESTS) diff --git a/src/dynarmic/CMakeModules/impl/TargetArchitectureSpecificSourcesWrapFile.cmake b/src/dynarmic/CMakeModules/impl/TargetArchitectureSpecificSourcesWrapFile.cmake index 82b3078a11..82c6c3bae0 100644 --- a/src/dynarmic/CMakeModules/impl/TargetArchitectureSpecificSourcesWrapFile.cmake +++ b/src/dynarmic/CMakeModules/impl/TargetArchitectureSpecificSourcesWrapFile.cmake @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +# SPDX-License-Identifier: GPL-3.0-or-later + string(TOUPPER "${arch}" arch) file(READ "${input_file}" f_contents) -file(WRITE "${output_file}" "#include \n#if defined(MCL_ARCHITECTURE_${arch})\n${f_contents}\n#endif\n") +file(WRITE "${output_file}" "#if defined(ARCHITECTURE_${arch})\n${f_contents}\n#endif\n") diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index 3d2ea4d42e..f79d18c15a 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -3,6 +3,12 @@ include(TargetArchitectureSpecificSources) add_library(dynarmic STATIC + mcl/bit.hpp + mcl/function_info.hpp + mcl/integer_of_size.hpp + mcl/intrusive_list.hpp + mcl/is_instance_of_template.hpp + backend/block_range_information.cpp backend/block_range_information.h backend/exception_handler.h @@ -53,7 +59,6 @@ add_library(dynarmic STATIC common/fp/util.h common/llvm_disassemble.cpp common/llvm_disassemble.h - common/lut_from_list.h common/math_util.cpp common/math_util.h common/safe_ops.h @@ -69,6 +74,7 @@ add_library(dynarmic STATIC frontend/decoder/matcher.h frontend/imm.cpp frontend/imm.h + interface/halt_reason.h interface/exclusive_monitor.h interface/optimization_flags.h ir/acc_type.h @@ -353,7 +359,7 @@ set_target_properties(dynarmic PROPERTIES target_compile_options(dynarmic PRIVATE ${DYNARMIC_CXX_FLAGS}) target_link_libraries(dynarmic PRIVATE unordered_dense::unordered_dense) -target_link_libraries(dynarmic PUBLIC fmt::fmt merry::mcl) +target_link_libraries(dynarmic PUBLIC fmt::fmt) if (BOOST_NO_HEADERS) target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool) diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a32_interface.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a32_interface.cpp index 91adc5783b..d16a34275b 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a32_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a32_interface.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -11,7 +11,6 @@ #include #include "dynarmic/common/assert.h" -#include #include "dynarmic/common/common_types.h" #include "dynarmic/backend/arm64/a32_address_space.h" @@ -36,14 +35,9 @@ struct Jit::Impl final { PerformRequestedCacheInvalidation(static_cast(Atomic::Load(&halt_reason))); jit_interface->is_executing = true; - SCOPE_EXIT { - jit_interface->is_executing = false; - }; - HaltReason hr = core.Run(current_address_space, current_state, &halt_reason); - PerformRequestedCacheInvalidation(hr); - + jit_interface->is_executing = false; return hr; } @@ -52,14 +46,9 @@ struct Jit::Impl final { PerformRequestedCacheInvalidation(static_cast(Atomic::Load(&halt_reason))); jit_interface->is_executing = true; - SCOPE_EXIT { - jit_interface->is_executing = false; - }; - HaltReason hr = core.Step(current_address_space, current_state, &halt_reason); - PerformRequestedCacheInvalidation(hr); - + jit_interface->is_executing = false; return hr; } diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a32_jitstate.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a32_jitstate.cpp index 67390a311d..0ce51d0e17 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a32_jitstate.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a32_jitstate.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -8,7 +8,7 @@ #include "dynarmic/backend/arm64/a32_jitstate.h" -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" namespace Dynarmic::Backend::Arm64 { diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a64_interface.cpp index 10324394a5..b230f455c5 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a64_interface.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -11,7 +11,6 @@ #include #include "dynarmic/common/assert.h" -#include #include "dynarmic/common/common_types.h" #include "dynarmic/backend/arm64/a64_address_space.h" @@ -34,32 +33,20 @@ struct Jit::Impl final { HaltReason Run() { ASSERT(!is_executing); PerformRequestedCacheInvalidation(static_cast(Atomic::Load(&halt_reason))); - is_executing = true; - SCOPE_EXIT { - is_executing = false; - }; - HaltReason hr = core.Run(current_address_space, current_state, &halt_reason); - PerformRequestedCacheInvalidation(hr); - + is_executing = false; return hr; } HaltReason Step() { ASSERT(!is_executing); PerformRequestedCacheInvalidation(static_cast(Atomic::Load(&halt_reason))); - is_executing = true; - SCOPE_EXIT { - is_executing = false; - }; - HaltReason hr = core.Step(current_address_space, current_state, &halt_reason); - PerformRequestedCacheInvalidation(hr); - + is_executing = false; return hr; } diff --git a/src/dynarmic/src/dynarmic/backend/arm64/abi.cpp b/src/dynarmic/src/dynarmic/backend/arm64/abi.cpp index 04d8ca2eaf..acff6a89f0 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/abi.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/abi.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -10,7 +10,7 @@ #include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include diff --git a/src/dynarmic/src/dynarmic/backend/arm64/abi.h b/src/dynarmic/src/dynarmic/backend/arm64/abi.h index f2b6b1dca1..b2e29d49e0 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/abi.h +++ b/src/dynarmic/src/dynarmic/backend/arm64/abi.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -12,7 +12,6 @@ #include #include -#include #include "dynarmic/common/common_types.h" #include "dynarmic/common/assert.h" #include @@ -37,7 +36,7 @@ constexpr auto Rscratch0() { } else if constexpr (bitsize == 64) { return Xscratch0; } else { - static_assert(Common::always_false_v>); + return Xscratch0; //UNREACHABLE(); } } @@ -48,7 +47,7 @@ constexpr auto Rscratch1() { } else if constexpr (bitsize == 64) { return Xscratch1; } else { - static_assert(Common::always_false_v>); + return Xscratch1; //UNREACHABLE(); } } diff --git a/src/dynarmic/src/dynarmic/backend/arm64/devirtualize.h b/src/dynarmic/src/dynarmic/backend/arm64/devirtualize.h index 60ce823066..737a6572e3 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/devirtualize.h +++ b/src/dynarmic/src/dynarmic/backend/arm64/devirtualize.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -10,7 +10,7 @@ #include #include "dynarmic/common/common_types.h" -#include +#include "dynarmic/mcl/function_info.hpp" namespace Dynarmic::Backend::Arm64 { diff --git a/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_a32.cpp b/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_a32.cpp index a65efb3c59..213403b4ba 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_a32.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_a32.cpp @@ -6,7 +6,7 @@ * SPDX-License-Identifier: 0BSD */ -#include +#include "dynarmic/mcl/bit.hpp" #include #include "dynarmic/backend/arm64/a32_jitstate.h" diff --git a/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector.cpp b/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector.cpp index a8c3aa02bf..c773d5a339 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -6,7 +6,6 @@ * SPDX-License-Identifier: 0BSD */ -#include #include #include "dynarmic/backend/arm64/a32_jitstate.h" @@ -46,7 +45,7 @@ static void EmitTwoOpArranged(oaknut::CodeGenerator& code, EmitContext& ctx, IR: } else if constexpr (size == 64) { emit(Qresult->D2(), Qoperand->D2()); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } }); } @@ -69,7 +68,7 @@ static void EmitTwoOpArrangedWiden(oaknut::CodeGenerator& code, EmitContext& ctx } else if constexpr (size == 32) { emit(Qresult->D2(), Qoperand->toD().S2()); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } }); } @@ -84,7 +83,7 @@ static void EmitTwoOpArrangedNarrow(oaknut::CodeGenerator& code, EmitContext& ct } else if constexpr (size == 64) { emit(Qresult->toD().S2(), Qoperand->D2()); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } }); } @@ -107,7 +106,7 @@ static void EmitTwoOpArrangedPairWiden(oaknut::CodeGenerator& code, EmitContext& } else if constexpr (size == 32) { emit(Qresult->D2(), Qoperand->S4()); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } }); } @@ -122,7 +121,7 @@ static void EmitTwoOpArrangedLower(oaknut::CodeGenerator& code, EmitContext& ctx } else if constexpr (size == 32) { emit(Qresult->toD().S2(), Qoperand->toD().S2()); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } }); } @@ -150,7 +149,7 @@ static void EmitThreeOpArranged(oaknut::CodeGenerator& code, EmitContext& ctx, I } else if constexpr (size == 64) { emit(Qresult->D2(), Qa->D2(), Qb->D2()); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } }); } @@ -175,7 +174,7 @@ static void EmitThreeOpArrangedWiden(oaknut::CodeGenerator& code, EmitContext& c } else if constexpr (size == 64) { emit(Qresult->Q1(), Qa->toD().D1(), Qb->toD().D1()); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } }); } @@ -198,7 +197,7 @@ static void EmitThreeOpArrangedLower(oaknut::CodeGenerator& code, EmitContext& c } else if constexpr (size == 32) { emit(Qresult->toD().S2(), Qa->toD().S2(), Qb->toD().S2()); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } }); } @@ -220,7 +219,7 @@ static void EmitSaturatedAccumulate(oaknut::CodeGenerator&, EmitContext& ctx, IR } else if constexpr (size == 64) { emit(Qaccumulator->D2(), Qoperand->D2()); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } } @@ -241,7 +240,7 @@ static void EmitImmShift(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* ins } else if constexpr (size == 64) { emit(Qresult->D2(), Qoperand->D2(), shift_amount); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } } @@ -269,7 +268,7 @@ static void EmitReduce(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, } else if constexpr (size == 64) { emit(Vresult, Qoperand->D2()); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } } diff --git a/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp index 4d11c62abd..557d6284ed 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -7,14 +7,8 @@ */ #include -#include -#include -#include -#include -#include -#include -#include -#include +#include "dynarmic/mcl/function_info.hpp" +#include "dynarmic/mcl/integer_of_size.hpp" #include #include "dynarmic/backend/arm64/a32_jitstate.h" @@ -31,7 +25,6 @@ #include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/op.h" #include "dynarmic/common/fp/rounding_mode.h" -#include "dynarmic/common/lut_from_list.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" @@ -39,8 +32,6 @@ namespace Dynarmic::Backend::Arm64 { using namespace oaknut::util; -namespace mp = mcl::mp; - using A64FullVectorWidth = std::integral_constant; // Array alias that always sizes itself according to the given type T @@ -84,7 +75,7 @@ static void EmitTwoOpArranged(oaknut::CodeGenerator& code, EmitContext& ctx, IR: } else if constexpr (size == 64) { emit(Qresult->D2(), Qa->D2()); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } }); } @@ -112,7 +103,7 @@ static void EmitThreeOpArranged(oaknut::CodeGenerator& code, EmitContext& ctx, I } else if constexpr (size == 64) { emit(Qresult->D2(), Qa->D2(), Qb->D2()); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } }); } @@ -135,7 +126,7 @@ static void EmitFMA(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* ins } else if constexpr (size == 64) { emit(Qresult->D2(), Qm->D2(), Qn->D2()); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } }); } @@ -157,7 +148,7 @@ static void EmitFromFixed(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Ins } else if constexpr (size == 64) { emit(Qto->D2(), Qfrom->D2(), fbits); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } }); } @@ -179,7 +170,7 @@ void EmitToFixed(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) } else if constexpr (fsize == 64) { return Qto->D2(); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } }(); auto Vfrom = [&] { @@ -188,7 +179,7 @@ void EmitToFixed(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) } else if constexpr (fsize == 64) { return Qfrom->D2(); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } }(); diff --git a/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector_saturation.cpp b/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector_saturation.cpp index 722a09176b..d63c1d92d0 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector_saturation.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector_saturation.cpp @@ -1,9 +1,11 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD */ -#include #include #include "dynarmic/backend/arm64/a32_jitstate.h" @@ -39,7 +41,7 @@ static void Emit(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitF } else if constexpr (size == 64) { emit(Qresult->D2(), Qa->D2(), Qb->D2()); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } } diff --git a/src/dynarmic/src/dynarmic/backend/arm64/fastmem.h b/src/dynarmic/src/dynarmic/backend/arm64/fastmem.h index cae05bcf23..8e40e81569 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/fastmem.h +++ b/src/dynarmic/src/dynarmic/backend/arm64/fastmem.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -10,11 +10,10 @@ #include #include - -#include -#include "dynarmic/common/common_types.h" #include +#include "dynarmic/mcl/bit.hpp" +#include "dynarmic/common/common_types.h" #include "dynarmic/backend/exception_handler.h" #include "dynarmic/ir/location_descriptor.h" @@ -22,9 +21,16 @@ namespace Dynarmic::Backend::Arm64 { using DoNotFastmemMarker = std::tuple; +constexpr size_t xmrx(size_t x) noexcept { + x ^= x >> 32; + x *= 0xff51afd7ed558ccd; + x ^= mcl::bit::rotate_right(x, 47) ^ mcl::bit::rotate_right(x, 23); + return x; +} + struct DoNotFastmemMarkerHash { - size_t operator()(const DoNotFastmemMarker& value) const { - return mcl::hash::xmrx(std::get<0>(value).Value() ^ static_cast(std::get<1>(value))); + [[nodiscard]] size_t operator()(const DoNotFastmemMarker& value) const noexcept { + return xmrx(std::get<0>(value).Value() ^ u64(std::get<1>(value))); } }; diff --git a/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.cpp index e98a2f6e71..a92648cd44 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.cpp @@ -13,9 +13,8 @@ #include #include "dynarmic/common/assert.h" -#include +#include "dynarmic/mcl/bit.hpp" #include -#include #include "dynarmic/common/common_types.h" #include "dynarmic/backend/arm64/abi.h" @@ -299,7 +298,7 @@ int RegAlloc::GenerateImmediate(const IR::Value& value) { return 0; } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } } @@ -317,8 +316,8 @@ int RegAlloc::RealizeReadImpl(const IR::Value& value) { return current_location->index; } - ASSERT(!ValueInfo(*current_location).realized); - ASSERT(ValueInfo(*current_location).locked); + ASSERT(!bool(ValueInfo(*current_location).realized)); + ASSERT(bool(ValueInfo(*current_location).locked)); if constexpr (required_kind == HostLoc::Kind::Gpr) { const int new_location_index = AllocateRegister(gprs, gpr_order); @@ -366,7 +365,7 @@ int RegAlloc::RealizeReadImpl(const IR::Value& value) { } else if constexpr (required_kind == HostLoc::Kind::Flags) { UNREACHABLE(); //A simple read from flags is likely a logic error } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } } @@ -390,7 +389,7 @@ int RegAlloc::RealizeWriteImpl(const IR::Inst* value) { flags.SetupLocation(value); return 0; } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } } @@ -410,7 +409,7 @@ int RegAlloc::RealizeReadWriteImpl(const IR::Value& read_value, const IR::Inst* } else if constexpr (kind == HostLoc::Kind::Flags) { ASSERT(false && "Incorrect function for ReadWrite of flags"); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } } diff --git a/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.h index 87446a240b..22ab5af662 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.h @@ -16,7 +16,7 @@ #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" -#include +#include "dynarmic/mcl/is_instance_of_template.hpp" #include #include diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler.h b/src/dynarmic/src/dynarmic/backend/exception_handler.h index cd274b111f..ff116c5775 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler.h +++ b/src/dynarmic/src/dynarmic/backend/exception_handler.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -12,7 +12,6 @@ #include #include -#include #include "dynarmic/common/common_types.h" #if defined(ARCHITECTURE_x86_64) diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp index 622f86816f..be44207f0a 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -16,11 +16,10 @@ #include #include #include +#include #include #include "dynarmic/common/assert.h" -#include -#include #include "dynarmic/common/common_types.h" #include "dynarmic/backend/exception_handler.h" diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c b/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c index 25678ab115..ebbe5a079f 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c @@ -3,8 +3,6 @@ * SPDX-License-Identifier: 0BSD */ -#include - #if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/mig/mach_exc_server.c" #elif defined(ARCHITECTURE_arm64) diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp index 58a3325e17..8b52437930 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -6,8 +6,6 @@ * SPDX-License-Identifier: 0BSD */ -#include - #if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/exception_handler_windows.cpp" #elif defined(ARCHITECTURE_arm64) diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp index 05f43774b6..edb24761f6 100644 --- a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp @@ -99,7 +99,7 @@ void A32AddressSpace::SetCursorPtr(CodePtr ptr) { } size_t A32AddressSpace::GetRemainingSize() { - return conf.code_cache_size - (GetCursorPtr() - GetMemPtr()); + return conf.code_cache_size - (GetCursorPtr() - GetMemPtr()); } EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) { diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.h b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.h index 24148b192c..e882a9e2e6 100644 --- a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.h +++ b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.h @@ -41,25 +41,25 @@ private: template T GetMemPtr() { - static_assert(std::is_pointer_v || std::is_same_v || std::is_same_v); + static_assert(std::is_pointer_v || std::is_same_v || std::is_same_v); return reinterpret_cast(as.GetBufferPointer(0)); } template T GetMemPtr() const { - static_assert(std::is_pointer_v || std::is_same_v || std::is_same_v); + static_assert(std::is_pointer_v || std::is_same_v || std::is_same_v); return reinterpret_cast(as.GetBufferPointer(0)); } template T GetCursorPtr() { - static_assert(std::is_pointer_v || std::is_same_v || std::is_same_v); + static_assert(std::is_pointer_v || std::is_same_v || std::is_same_v); return reinterpret_cast(as.GetCursorPointer()); } template T GetCursorPtr() const { - static_assert(std::is_pointer_v || std::is_same_v || std::is_same_v); + static_assert(std::is_pointer_v || std::is_same_v || std::is_same_v); return reinterpret_cast(as.GetCursorPointer()); } diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/a32_interface.cpp b/src/dynarmic/src/dynarmic/backend/riscv64/a32_interface.cpp index c0f65a49ed..3f395bfafb 100644 --- a/src/dynarmic/src/dynarmic/backend/riscv64/a32_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/riscv64/a32_interface.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -11,7 +11,6 @@ #include #include "dynarmic/common/assert.h" -#include #include "dynarmic/common/common_types.h" #include "dynarmic/backend/riscv64/a32_address_space.h" @@ -34,28 +33,18 @@ struct Jit::Impl final { HaltReason Run() { ASSERT(!jit_interface->is_executing); jit_interface->is_executing = true; - SCOPE_EXIT { - jit_interface->is_executing = false; - }; - HaltReason hr = core.Run(current_address_space, current_state, &halt_reason); - RequestCacheInvalidation(); - + jit_interface->is_executing = false; return hr; } HaltReason Step() { ASSERT(!jit_interface->is_executing); jit_interface->is_executing = true; - SCOPE_EXIT { - jit_interface->is_executing = false; - }; - UNIMPLEMENTED(); - RequestCacheInvalidation(); - + jit_interface->is_executing = false; return HaltReason{}; } diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/a32_jitstate.cpp b/src/dynarmic/src/dynarmic/backend/riscv64/a32_jitstate.cpp index 87eeab6b0f..70cd6bf0f1 100644 --- a/src/dynarmic/src/dynarmic/backend/riscv64/a32_jitstate.cpp +++ b/src/dynarmic/src/dynarmic/backend/riscv64/a32_jitstate.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -8,7 +8,7 @@ #include "dynarmic/backend/riscv64/a32_jitstate.h" -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" namespace Dynarmic::Backend::RV64 { diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/code_block.h b/src/dynarmic/src/dynarmic/backend/riscv64/code_block.h index 7d53486a48..02bfa44eec 100644 --- a/src/dynarmic/src/dynarmic/backend/riscv64/code_block.h +++ b/src/dynarmic/src/dynarmic/backend/riscv64/code_block.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -30,7 +30,7 @@ public: template T ptr() const noexcept { - static_assert(std::is_pointer_v || std::is_same_v || std::is_same_v); + static_assert(std::is_pointer_v || std::is_same_v || std::is_same_v); return reinterpret_cast(mem); } diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/emit_riscv64.cpp b/src/dynarmic/src/dynarmic/backend/riscv64/emit_riscv64.cpp index 50cbaf9526..5ce7dee1e1 100644 --- a/src/dynarmic/src/dynarmic/backend/riscv64/emit_riscv64.cpp +++ b/src/dynarmic/src/dynarmic/backend/riscv64/emit_riscv64.cpp @@ -12,7 +12,7 @@ #include #include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/backend/riscv64/a32_jitstate.h" #include "dynarmic/backend/riscv64/abi.h" diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/riscv64/reg_alloc.cpp index 49489c40a4..4ab5d43db8 100644 --- a/src/dynarmic/src/dynarmic/backend/riscv64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/riscv64/reg_alloc.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -12,7 +12,6 @@ #include #include "dynarmic/common/assert.h" -#include #include "dynarmic/common/common_types.h" #include "dynarmic/common/always_false.h" @@ -164,9 +163,8 @@ u32 RegAlloc::GenerateImmediate(const IR::Value& value) { } else if constexpr (kind == HostLoc::Kind::Fpr) { UNIMPLEMENTED(); } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } - return 0; } @@ -225,7 +223,7 @@ u32 RegAlloc::RealizeReadImpl(const IR::Value& value) { fprs[new_location_index].realized = true; return new_location_index; } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } } @@ -252,7 +250,7 @@ u32 RegAlloc::RealizeWriteImpl(const IR::Inst* value) { setup_location(fprs[new_location_index]); return new_location_index; } else { - static_assert(Common::always_false_v>); + UNREACHABLE(); } } diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/riscv64/reg_alloc.h index e8fd471ae1..be826e63f6 100644 --- a/src/dynarmic/src/dynarmic/backend/riscv64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/riscv64/reg_alloc.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -18,7 +18,7 @@ #include #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" -#include +#include "dynarmic/mcl/is_instance_of_template.hpp" #include #include "dynarmic/backend/riscv64/stack_layout.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp index f037919eb0..dd9e9e4a66 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp @@ -15,8 +15,7 @@ #include #include #include "dynarmic/common/assert.h" -#include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include @@ -60,8 +59,10 @@ static Xbyak::Address MJitStateExtReg(A32::ExtReg reg) { UNREACHABLE(); } -A32EmitContext::A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block) - : EmitContext(reg_alloc, block), conf(conf) {} +A32EmitContext::A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector& shared_labels) + : EmitContext(reg_alloc, block, shared_labels) + , conf(conf) +{} A32::LocationDescriptor A32EmitContext::Location() const { return A32::LocationDescriptor{block.Location()}; @@ -110,35 +111,59 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { gprs.reset(size_t(HostLoc::R14)); return gprs; }(), any_xmm); - A32EmitContext ctx{conf, reg_alloc, block}; + + A32EmitContext ctx{conf, reg_alloc, block, shared_labels}; // Start emitting. code.align(); const u8* const entrypoint = code.getCurr(); + code.mov(code.qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, abi_base_pointer)], rbp); + code.lea(rbp, code.ptr[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, abi_base_pointer) - 8]); EmitCondPrelude(ctx); - - for (auto iter = block.instructions.begin(); iter != block.instructions.end(); ++iter) [[likely]] { - auto* inst = &*iter; - // Call the relevant Emit* member function. - switch (inst->GetOpcode()) { -#define OPCODE(name, type, ...) \ - case IR::Opcode::name: \ - A32EmitX64::Emit##name(ctx, inst); \ - break; -#define A32OPC(name, type, ...) \ - case IR::Opcode::A32##name: \ - A32EmitX64::EmitA32##name(ctx, inst);\ - break; + typedef void (EmitX64::*EmitHandlerFn)(EmitContext& context, IR::Inst* inst); + constexpr EmitHandlerFn opcode_handlers[] = { +#define OPCODE(name, type, ...) &EmitX64::Emit##name, +#define A32OPC(name, type, ...) +#define A64OPC(name, type, ...) +#include "dynarmic/ir/opcodes.inc" +#undef OPCODE +#undef A32OPC +#undef A64OPC + }; + typedef void (A32EmitX64::*A32EmitHandlerFn)(A32EmitContext& context, IR::Inst* inst); + constexpr A32EmitHandlerFn a32_handlers[] = { +#define OPCODE(...) +#define A32OPC(name, type, ...) &A32EmitX64::EmitA32##name, #define A64OPC(...) #include "dynarmic/ir/opcodes.inc" #undef OPCODE #undef A32OPC +#undef A64OPC + }; + + for (auto& inst : block.instructions) { + auto const opcode = inst.GetOpcode(); + // Call the relevant Emit* member function. + switch (opcode) { +#define OPCODE(name, type, ...) case IR::Opcode::name: goto opcode_branch; +#define A32OPC(name, type, ...) case IR::Opcode::A32##name: goto a32_branch; +#define A64OPC(name, type, ...) +#include "dynarmic/ir/opcodes.inc" +#undef OPCODE +#undef A32OPC #undef A64OPC default: UNREACHABLE(); } - reg_alloc.EndOfAllocScope(); +opcode_branch: + (this->*opcode_handlers[size_t(opcode)])(ctx, &inst); + goto finish_this_inst; +a32_branch: + // Update with FIRST A32 instruction + (this->*a32_handlers[size_t(opcode) - size_t(IR::Opcode::A32SetCheckBit)])(ctx, &inst); +finish_this_inst: + ctx.reg_alloc.EndOfAllocScope(); #ifndef NDEBUG if (conf.very_verbose_debugging_output) EmitVerboseDebuggingOutput(reg_alloc); @@ -147,15 +172,14 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { reg_alloc.AssertNoMoreUses(); - if (conf.enable_cycle_counting) { + if (conf.enable_cycle_counting) EmitAddCycles(block.CycleCount()); - } + code.mov(rbp, code.qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, abi_base_pointer)]); EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep()); code.int3(); - for (auto& deferred_emit : ctx.deferred_emits) { + for (auto& deferred_emit : ctx.deferred_emits) deferred_emit(); - } code.int3(); const size_t size = size_t(code.getCurr() - entrypoint); @@ -168,6 +192,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { auto const bdesc = RegisterBlock(descriptor, entrypoint, size); code.DisableWriting(); + shared_labels.clear(); return bdesc; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h index 5ec78ff50e..8e97dc7737 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -29,7 +29,7 @@ namespace Dynarmic::Backend::X64 { class RegAlloc; struct A32EmitContext final : public EmitContext { - A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block); + A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector& shared_labels); A32::LocationDescriptor Location() const; A32::LocationDescriptor EndLocation() const; @@ -130,6 +130,7 @@ public: ankerl::unordered_dense::map, void (*)()> write_fallbacks; ankerl::unordered_dense::map, void (*)()> exclusive_write_fallbacks; ankerl::unordered_dense::set do_not_fastmem; + boost::container::stable_vector shared_labels; void (*memory_read_128)() = nullptr; // Dummy void (*memory_write_128)() = nullptr; // Dummy const void* terminal_handler_pop_rsb_hint; diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp index a6b99b545f..f138b5f137 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp @@ -13,7 +13,7 @@ #include #include -#include +#include "dynarmic/mcl/integer_of_size.hpp" #include "dynarmic/backend/x64/xbyak.h" #include "dynarmic/backend/x64/a32_emit_x64.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp index e540839fd5..b48dcf9046 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp @@ -14,7 +14,6 @@ #include #include "dynarmic/common/assert.h" #include -#include #include "dynarmic/common/common_types.h" #include "dynarmic/common/llvm_disassemble.h" @@ -77,12 +76,7 @@ struct Jit::Impl { HaltReason Run() { ASSERT(!jit_interface->is_executing); PerformRequestedCacheInvalidation(static_cast(Atomic::Load(&jit_state.halt_reason))); - jit_interface->is_executing = true; - SCOPE_EXIT { - jit_interface->is_executing = false; - }; - const CodePtr current_codeptr = [this] { // RSB optimization const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask; @@ -93,27 +87,19 @@ struct Jit::Impl { return GetCurrentBlock(); }(); - const HaltReason hr = block_of_code.RunCode(&jit_state, current_codeptr); - PerformRequestedCacheInvalidation(hr); - + jit_interface->is_executing = false; return hr; } HaltReason Step() { ASSERT(!jit_interface->is_executing); PerformRequestedCacheInvalidation(static_cast(Atomic::Load(&jit_state.halt_reason))); - jit_interface->is_executing = true; - SCOPE_EXIT { - jit_interface->is_executing = false; - }; - const HaltReason hr = block_of_code.StepCode(&jit_state, GetCurrentSingleStep()); - PerformRequestedCacheInvalidation(hr); - + jit_interface->is_executing = false; return hr; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_jitstate.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_jitstate.cpp index ed5a8f9454..066b931350 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_jitstate.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_jitstate.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -9,7 +9,7 @@ #include "dynarmic/backend/x64/a32_jitstate.h" #include "dynarmic/common/assert.h" -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/backend/x64/block_of_code.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index ff82d8b05c..8edeb29aed 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -11,9 +11,8 @@ #include #include #include "dynarmic/common/assert.h" -#include #include "dynarmic/common/common_types.h" -#include +#include "dynarmic/mcl/integer_of_size.hpp" #include #include "dynarmic/backend/x64/a64_jitstate.h" @@ -38,8 +37,10 @@ namespace Dynarmic::Backend::X64 { using namespace Xbyak::util; -A64EmitContext::A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block) - : EmitContext(reg_alloc, block), conf(conf) {} +A64EmitContext::A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector& shared_labels) + : EmitContext(reg_alloc, block, shared_labels) + , conf(conf) +{} A64::LocationDescriptor A64EmitContext::Location() const { return A64::LocationDescriptor{block.Location()}; @@ -84,11 +85,14 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept { gprs.reset(size_t(HostLoc::R14)); return gprs; }(), any_xmm}; - A64EmitContext ctx{conf, reg_alloc, block}; + + A64EmitContext ctx{conf, reg_alloc, block, shared_labels}; // Start emitting. code.align(); const auto* const entrypoint = code.getCurr(); + code.mov(code.qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, abi_base_pointer)], rbp); + code.lea(rbp, code.ptr[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, abi_base_pointer) - 8]); DEBUG_ASSERT(block.GetCondition() == IR::Cond::AL); typedef void (EmitX64::*EmitHandlerFn)(EmitContext& context, IR::Inst* inst); @@ -140,16 +144,13 @@ finish_this_inst: } reg_alloc.AssertNoMoreUses(); - - if (conf.enable_cycle_counting) { + if (conf.enable_cycle_counting) EmitAddCycles(block.CycleCount()); - } + code.mov(rbp, code.qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, abi_base_pointer)]); EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep()); code.int3(); - - for (auto& deferred_emit : ctx.deferred_emits) { + for (auto& deferred_emit : ctx.deferred_emits) deferred_emit(); - } code.int3(); const size_t size = size_t(code.getCurr() - entrypoint); @@ -162,6 +163,7 @@ finish_this_inst: auto bdesc = RegisterBlock(descriptor, entrypoint, size); code.DisableWriting(); + shared_labels.clear(); return bdesc; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h index dd556e36ce..d57b1d81b9 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -27,7 +27,7 @@ namespace Dynarmic::Backend::X64 { struct A64EmitContext final : public EmitContext { - A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block); + A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector& shared_labels); A64::LocationDescriptor Location() const; bool IsSingleStep() const; @@ -126,6 +126,7 @@ public: ankerl::unordered_dense::map, void (*)()> write_fallbacks; ankerl::unordered_dense::map, void (*)()> exclusive_write_fallbacks; ankerl::unordered_dense::set do_not_fastmem; + boost::container::stable_vector shared_labels; const void* terminal_handler_pop_rsb_hint = nullptr; const void* terminal_handler_fast_dispatch_hint = nullptr; FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr; diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp index 6b3d4d86a5..4b7054b4b2 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp @@ -13,7 +13,7 @@ #include #include -#include +#include "dynarmic/mcl/integer_of_size.hpp" #include "dynarmic/backend/x64/xbyak.h" #include "dynarmic/backend/x64/a64_emit_x64.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index 44c63bdfc5..96440d273e 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -15,7 +15,6 @@ #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/llvm_disassemble.h" #include -#include #include "dynarmic/backend/x64/a64_emit_x64.h" #include "dynarmic/backend/x64/a64_jitstate.h" @@ -75,14 +74,8 @@ public: HaltReason Run() { ASSERT(!is_executing); PerformRequestedCacheInvalidation(static_cast(Atomic::Load(&jit_state.halt_reason))); - is_executing = true; - SCOPE_EXIT { - this->is_executing = false; - }; - // TODO: Check code alignment - const CodePtr current_code_ptr = [this] { // RSB optimization const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask; @@ -92,27 +85,19 @@ public: } return CodePtr((uintptr_t(GetCurrentBlock()) + 15) & ~uintptr_t(15)); }(); - const HaltReason hr = block_of_code.RunCode(&jit_state, current_code_ptr); - PerformRequestedCacheInvalidation(hr); - + is_executing = false; return hr; } HaltReason Step() { ASSERT(!is_executing); PerformRequestedCacheInvalidation(static_cast(Atomic::Load(&jit_state.halt_reason))); - is_executing = true; - SCOPE_EXIT { - this->is_executing = false; - }; - const HaltReason hr = block_of_code.StepCode(&jit_state, GetCurrentSingleStep()); - PerformRequestedCacheInvalidation(hr); - + is_executing = false; return hr; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_jitstate.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_jitstate.cpp index 9f983f3955..3f04ed63f0 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_jitstate.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_jitstate.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2016 MerryMage * SPDX-License-Identifier: 0BSD @@ -5,7 +8,7 @@ #include "dynarmic/backend/x64/a64_jitstate.h" -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A64/a64_location_descriptor.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp index 28c821ab59..3a161fca6b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -25,7 +25,7 @@ #include #include "dynarmic/common/assert.h" -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/backend/x64/xbyak.h" #include "dynarmic/backend/x64/a32_jitstate.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.h b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.h index 5ccab7a3ed..f6c12edaaa 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.h +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.h @@ -13,7 +13,7 @@ #include #include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/backend/x64/xbyak.h" #include "dynarmic/backend/x64/abi.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/constants.h b/src/dynarmic/src/dynarmic/backend/x64/constants.h index 65c5a09a53..a0ae9f3c1e 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/constants.h +++ b/src/dynarmic/src/dynarmic/backend/x64/constants.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -10,7 +10,7 @@ #include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/common/fp/rounding_mode.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/devirtualize.h b/src/dynarmic/src/dynarmic/backend/x64/devirtualize.h index 6f22646157..422d21169f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/devirtualize.h +++ b/src/dynarmic/src/dynarmic/backend/x64/devirtualize.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -13,7 +13,7 @@ #include #include "dynarmic/common/common_types.h" -#include +#include "dynarmic/mcl/function_info.hpp" #include "dynarmic/backend/x64/callback.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp index 2b0540e4a7..4ed198e09f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp @@ -12,8 +12,7 @@ #include "dynarmic/common/assert.h" #include -#include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include @@ -33,8 +32,11 @@ namespace Dynarmic::Backend::X64 { using namespace Xbyak::util; -EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block) - : reg_alloc(reg_alloc), block(block) {} +EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector& shared_labels) + : reg_alloc(reg_alloc) + , block(block) + , shared_labels(shared_labels) +{} EmitContext::~EmitContext() = default; diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.h b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.h index 5de5f2dc7a..619945e19a 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.h +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.h @@ -16,11 +16,12 @@ #include #include -#include #include -#include "dynarmic/backend/x64/xbyak.h" +#include #include +#include "dynarmic/backend/x64/xbyak.h" +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/backend/exception_handler.h" #include "dynarmic/backend/x64/reg_alloc.h" #include "dynarmic/common/fp/fpcr.h" @@ -52,24 +53,23 @@ using VectorArray = std::array> template using HalfVectorArray = std::array / 2>; +using SharedLabel = Xbyak::Label*; struct EmitContext { - EmitContext(RegAlloc& reg_alloc, IR::Block& block); + EmitContext(RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector& shared_labels); virtual ~EmitContext(); virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0; virtual bool HasOptimization(OptimizationFlag flag) const = 0; - RegAlloc& reg_alloc; - IR::Block& block; + [[nodiscard]] inline Xbyak::Label* GenSharedLabel() noexcept { + return &shared_labels.emplace_back(); + } std::vector> deferred_emits; + RegAlloc& reg_alloc; + IR::Block& block; + boost::container::stable_vector& shared_labels; }; -using SharedLabel = std::shared_ptr; - -inline SharedLabel GenSharedLabel() { - return std::make_shared(); -} - class EmitX64 { public: struct BlockDescriptor { diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 76c103ec6f..6a3ab005f3 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -11,14 +11,8 @@ #include #include "dynarmic/common/assert.h" -#include -#include -#include -#include -#include -#include #include "dynarmic/common/common_types.h" -#include +#include "dynarmic/mcl/integer_of_size.hpp" #include "dynarmic/backend/x64/xbyak.h" #include "dynarmic/backend/x64/abi.h" @@ -31,7 +25,6 @@ #include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/op.h" #include "dynarmic/common/fp/rounding_mode.h" -#include "dynarmic/common/lut_from_list.h" #include "dynarmic/interface/optimization_flags.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" @@ -42,7 +35,6 @@ namespace Dynarmic::Backend::X64 { using namespace Xbyak::util; -namespace mp = mcl::mp; namespace { @@ -144,7 +136,7 @@ void ForceToDefaultNaN(BlockOfCode& code, Xbyak::Xmm result) { template SharedLabel ProcessNaN(BlockOfCode& code, EmitContext& ctx, Xbyak::Xmm a) { - SharedLabel nan = GenSharedLabel(), end = GenSharedLabel(); + SharedLabel nan = ctx.GenSharedLabel(), end = ctx.GenSharedLabel(); FCODE(ucomis)(a, a); code.jp(*nan, code.T_NEAR); @@ -259,7 +251,7 @@ template void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - SharedLabel end = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); @@ -312,7 +304,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code); - SharedLabel end = GenSharedLabel(), nan = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(), nan = ctx.GenSharedLabel(); code.movaps(result, op1); if constexpr (std::is_member_function_pointer_v) { @@ -421,7 +413,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bo DenormalsAreZero(code, ctx, {result, operand}); - SharedLabel equal = GenSharedLabel(), end = GenSharedLabel(); + SharedLabel equal = ctx.GenSharedLabel(), end = ctx.GenSharedLabel(); FCODE(ucomis)(result, operand); code.jz(*equal, code.T_NEAR); @@ -492,7 +484,7 @@ static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR:: } }; - SharedLabel end = GenSharedLabel(), z = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(), z = ctx.GenSharedLabel(); FCODE(ucomis)(op1, op2); code.jz(*z, code.T_NEAR); @@ -640,7 +632,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bo } if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX)) { - SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel(); + SharedLabel fallback = ctx.GenSharedLabel(), end = ctx.GenSharedLabel(); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); @@ -851,7 +843,7 @@ static void EmitFPMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Reg64 tmp = do_default_nan ? INVALID_REG : ctx.reg_alloc.ScratchGpr(code); - SharedLabel end = GenSharedLabel(), nan = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(), nan = ctx.GenSharedLabel(); if (code.HasHostFeature(HostFeature::AVX)) { FCODE(vmuls)(result, op1, op2); @@ -989,7 +981,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* } if (code.HasHostFeature(HostFeature::FMA)) { - SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel(); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); @@ -1137,7 +1129,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(code); [[maybe_unused]] const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32(); - SharedLabel bad_values = GenSharedLabel(), end = GenSharedLabel(); + SharedLabel bad_values = ctx.GenSharedLabel(), end = ctx.GenSharedLabel(); code.movaps(value, operand); @@ -1304,7 +1296,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* } if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX)) { - SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel(); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); @@ -1649,7 +1641,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(code); if (!unsigned_) { - SharedLabel saturate_max = GenSharedLabel(), end = GenSharedLabel(); + SharedLabel saturate_max = ctx.GenSharedLabel(), end = ctx.GenSharedLabel(); ZeroIfNaN<64>(code, src, scratch); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc index aaed8b43f2..4fa14d504b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -3,7 +3,8 @@ * SPDX-License-Identifier: 0BSD */ -#include +#define CONCATENATE_TOKENS(x, y) CONCATENATE_TOKENS_IMPL(x, y) +#define CONCATENATE_TOKENS_IMPL(x, y) x##y #define AxxEmitX64 CONCATENATE_TOKENS(Axx, EmitX64) #define AxxEmitContext CONCATENATE_TOKENS(Axx, EmitContext) @@ -85,7 +86,7 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) { const auto wrapped_fn = read_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)]; - SharedLabel abort = GenSharedLabel(), end = GenSharedLabel(); + SharedLabel abort = ctx.GenSharedLabel(), end = ctx.GenSharedLabel(); if (fastmem_marker) { // Use fastmem @@ -107,7 +108,7 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) { conf.recompile_on_fastmem_failure, }); - EmitCheckMemoryAbort(ctx, inst, end.get()); + EmitCheckMemoryAbort(ctx, inst, end); code.jmp(*end, code.T_NEAR); }); } else { @@ -119,7 +120,7 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) { ctx.deferred_emits.emplace_back([=, this, &ctx] { code.L(*abort); code.call(wrapped_fn); - EmitCheckMemoryAbort(ctx, inst, end.get()); + EmitCheckMemoryAbort(ctx, inst, end); code.jmp(*end, code.T_NEAR); }); } @@ -172,7 +173,7 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) { const auto wrapped_fn = write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)]; - SharedLabel abort = GenSharedLabel(), end = GenSharedLabel(); + SharedLabel abort = ctx.GenSharedLabel(), end = ctx.GenSharedLabel(); if (fastmem_marker) { // Use fastmem @@ -194,7 +195,7 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) { conf.recompile_on_fastmem_failure, }); - EmitCheckMemoryAbort(ctx, inst, end.get()); + EmitCheckMemoryAbort(ctx, inst, end); code.jmp(*end, code.T_NEAR); }); } else { @@ -206,7 +207,7 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) { ctx.deferred_emits.emplace_back([=, this, &ctx] { code.L(*abort); code.call(wrapped_fn); - EmitCheckMemoryAbort(ctx, inst, end.get()); + EmitCheckMemoryAbort(ctx, inst, end); code.jmp(*end, code.T_NEAR); }); } @@ -351,7 +352,7 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in const auto fastmem_marker = ShouldFastmem(ctx, inst); if (fastmem_marker) { - SharedLabel abort = GenSharedLabel(), end = GenSharedLabel(); + SharedLabel abort = ctx.GenSharedLabel(), end = ctx.GenSharedLabel(); bool require_abort_handling = false; const auto src_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling); @@ -426,7 +427,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i EmitExclusiveLock(code, conf, tmp, tmp2.cvt32()); - SharedLabel end = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(); code.mov(status, u32(1)); code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]); @@ -459,7 +460,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i const auto fastmem_marker = ShouldFastmem(ctx, inst); if (fastmem_marker) { - SharedLabel abort = GenSharedLabel(); + SharedLabel abort = ctx.GenSharedLabel(); bool require_abort_handling = false; const auto dest_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling, tmp); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h index b354efcb51..3ac078f1d7 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h @@ -54,7 +54,7 @@ void EmitDetectMisalignedVAddr(BlockOfCode& code, EmitContext& ctx, size_t bitsi if (ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) { const u32 page_align_mask = static_cast(page_table_const_size - 1) & ~align_mask; - SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel(); + SharedLabel detect_boundary = ctx.GenSharedLabel(), resume = ctx.GenSharedLabel(); code.jnz(*detect_boundary, code.T_NEAR); code.L(*resume); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp index f2abf5ef15..4c9ea821cc 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp @@ -9,9 +9,9 @@ #include #include "dynarmic/common/assert.h" -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" -#include +#include "dynarmic/mcl/integer_of_size.hpp" #include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/emit_x64.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index 45add99295..6f53580997 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -13,11 +13,9 @@ #include #include "dynarmic/common/assert.h" -#include -#include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" -#include +#include "dynarmic/mcl/function_info.hpp" #include "dynarmic/backend/x64/xbyak.h" #include "dynarmic/backend/x64/abi.h" @@ -40,33 +38,21 @@ template static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); (code.*fn)(xmm_a, xmm_b); ctx.reg_alloc.DefineValue(code, inst, xmm_a); } -template -static void EmitAVXVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - - (code.*fn)(xmm_a, xmm_a, xmm_b); - - ctx.reg_alloc.DefineValue(code, inst, xmm_a); -} - template static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 2 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); @@ -88,8 +74,8 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 2 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); @@ -113,9 +99,9 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 3 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + auto const arg2 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); @@ -141,9 +127,9 @@ static void EmitTwoArgumentFallbackWithSaturationAndImmediate(BlockOfCode& code, const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 2 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + auto const arg1 = ctx.reg_alloc.UseXmm(code, args[0]); const u8 arg2 = args[1].GetImmediateU8(); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); @@ -168,9 +154,9 @@ static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 3 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + auto const arg2 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); @@ -196,7 +182,7 @@ void EmitX64::EmitVectorGetElement8(EmitContext& ctx, IR::Inst* inst) { // TODO: DefineValue directly on Argument for index == 0 - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseXmm(code, args[0]); const Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr(code).cvt32(); if (code.HasHostFeature(HostFeature::SSE41)) { @@ -220,7 +206,7 @@ void EmitX64::EmitVectorGetElement16(EmitContext& ctx, IR::Inst* inst) { // TODO: DefineValue directly on Argument for index == 0 - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseXmm(code, args[0]); const Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.pextrw(dest, source, index); ctx.reg_alloc.DefineValue(code, inst, dest); @@ -236,10 +222,10 @@ void EmitX64::EmitVectorGetElement32(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr(code).cvt32(); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseXmm(code, args[0]); code.pextrd(dest, source, index); } else { - const Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshufd(source, source, index); code.movd(dest, source); } @@ -255,7 +241,7 @@ void EmitX64::EmitVectorGetElement64(EmitContext& ctx, IR::Inst* inst) { if (index == 0) { // TODO: DefineValue directly on Argument for index == 0 const Xbyak::Reg64 dest = ctx.reg_alloc.ScratchGpr(code).cvt64(); - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseXmm(code, args[0]); code.movq(dest, source); ctx.reg_alloc.DefineValue(code, inst, dest); return; @@ -264,10 +250,10 @@ void EmitX64::EmitVectorGetElement64(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg64 dest = ctx.reg_alloc.ScratchGpr(code).cvt64(); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseXmm(code, args[0]); code.pextrq(dest, source, 1); } else { - const Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.punpckhqdq(source, source); code.movq(dest, source); } @@ -279,7 +265,7 @@ void EmitX64::EmitVectorSetElement8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); - const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { const Xbyak::Reg8 source_elem = ctx.reg_alloc.UseGpr(code, args[2]).cvt8(); @@ -312,7 +298,7 @@ void EmitX64::EmitVectorSetElement16(EmitContext& ctx, IR::Inst* inst) { ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); - const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); const Xbyak::Reg16 source_elem = ctx.reg_alloc.UseGpr(code, args[2]).cvt16(); code.pinsrw(source_vector, source_elem.cvt32(), index); @@ -324,7 +310,7 @@ void EmitX64::EmitVectorSetElement32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); - const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { const Xbyak::Reg32 source_elem = ctx.reg_alloc.UseGpr(code, args[2]).cvt32(); @@ -347,7 +333,7 @@ void EmitX64::EmitVectorSetElement64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); - const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { const Xbyak::Reg64 source_elem = ctx.reg_alloc.UseGpr(code, args[2]); @@ -357,7 +343,7 @@ void EmitX64::EmitVectorSetElement64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, source_vector); } else { const Xbyak::Reg64 source_elem = ctx.reg_alloc.UseGpr(code, args[2]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movq(tmp, source_elem); @@ -371,72 +357,53 @@ void EmitX64::EmitVectorSetElement64(EmitContext& ctx, IR::Inst* inst) { } } -static void VectorAbs8(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& data) { - if (code.HasHostFeature(HostFeature::SSSE3)) { - code.pabsb(data, data); - } else { - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); - code.pxor(temp, temp); - code.psubb(temp, data); - code.pminub(data, temp); - } -} - -static void VectorAbs16(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& data) { - if (code.HasHostFeature(HostFeature::SSSE3)) { - code.pabsw(data, data); - } else { - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); - code.pxor(temp, temp); - code.psubw(temp, data); - code.pmaxsw(data, temp); - } -} - -static void VectorAbs32(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& data) { - if (code.HasHostFeature(HostFeature::SSSE3)) { - code.pabsd(data, data); - } else { - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); - code.movdqa(temp, data); - code.psrad(temp, 31); - code.pxor(data, temp); - code.psubd(data, temp); - } -} - -static void VectorAbs64(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& data) { - if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - code.vpabsq(data, data); - } else { - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); - code.pshufd(temp, data, 0b11110101); - code.psrad(temp, 31); - code.pxor(data, temp); - code.psubq(data, temp); - } -} - static void EmitVectorAbs(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); - + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); switch (esize) { case 8: - VectorAbs8(code, ctx, data); + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pabsb(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.pxor(temp, temp); + code.psubb(temp, data); + code.pminub(data, temp); + } break; case 16: - VectorAbs16(code, ctx, data); + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pabsw(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.pxor(temp, temp); + code.psubw(temp, data); + code.pmaxsw(data, temp); + } break; case 32: - VectorAbs32(code, ctx, data); + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pabsd(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(temp, data); + code.psrad(temp, 31); + code.pxor(data, temp); + code.psubd(data, temp); + } break; case 64: - VectorAbs64(code, ctx, data); + if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { + code.vpabsq(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.pshufd(temp, data, 0b11110101); + code.psrad(temp, 31); + code.pxor(data, temp); + code.psubq(data, temp); + } break; } - ctx.reg_alloc.DefineValue(code, inst, data); } @@ -479,15 +446,15 @@ void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorAndNot(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const xmm_a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.pandn(xmm_b, xmm_a); ctx.reg_alloc.DefineValue(code, inst, xmm_b); } -static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) { +static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, auto const& result, u8 shift_amount) { if (code.HasHostFeature(HostFeature::GFNI)) { const u64 shift_matrix = shift_amount < 8 ? (0x0102040810204080 << (shift_amount * 8)) | (0x8080808080808080 >> (64 - shift_amount * 8)) @@ -496,7 +463,7 @@ static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const return; } - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.punpckhbw(tmp, result); code.punpcklbw(result, result); @@ -508,7 +475,7 @@ static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const void EmitX64::EmitVectorArithmeticShiftRight8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); ArithmeticShiftRightByte(ctx, code, result, shift_amount); @@ -519,7 +486,7 @@ void EmitX64::EmitVectorArithmeticShiftRight8(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorArithmeticShiftRight16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psraw(result, shift_amount); @@ -530,7 +497,7 @@ void EmitX64::EmitVectorArithmeticShiftRight16(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psrad(result, shift_amount); @@ -540,14 +507,14 @@ void EmitX64::EmitVectorArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = (std::min)(args[1].GetImmediateU8(), u8(63)); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { code.vpsraq(result, result, shift_amount); } else { - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); const u64 sign_bit = 0x80000000'00000000u >> shift_amount; @@ -662,12 +629,12 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastb(a, a); code.vmovq(a, a); } else if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pxor(tmp, tmp); code.pshufb(a, tmp); code.movq(a, a); @@ -680,7 +647,7 @@ void EmitX64::EmitVectorBroadcastLower8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(a, a, 0); @@ -689,7 +656,7 @@ void EmitX64::EmitVectorBroadcastLower16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastLower32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(a, a, 0b01000100); @@ -698,11 +665,11 @@ void EmitX64::EmitVectorBroadcastLower32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcast8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastb(a, a); } else if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pxor(tmp, tmp); code.pshufb(a, tmp); } else { @@ -715,7 +682,7 @@ void EmitX64::EmitVectorBroadcast8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcast16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastw(a, a); } else { @@ -727,7 +694,7 @@ void EmitX64::EmitVectorBroadcast16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcast32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastd(a, a); } else { @@ -738,7 +705,7 @@ void EmitX64::EmitVectorBroadcast32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcast64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastq(a, a); } else { @@ -749,7 +716,7 @@ void EmitX64::EmitVectorBroadcast64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastElementLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 16); @@ -760,7 +727,7 @@ void EmitX64::EmitVectorBroadcastElementLower8(EmitContext& ctx, IR::Inst* inst) code.vpbroadcastb(a, a); code.vmovq(a, a); } else if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pxor(tmp, tmp); code.pshufb(a, tmp); code.movq(a, a); @@ -773,7 +740,7 @@ void EmitX64::EmitVectorBroadcastElementLower8(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorBroadcastElementLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 8); @@ -786,7 +753,7 @@ void EmitX64::EmitVectorBroadcastElementLower16(EmitContext& ctx, IR::Inst* inst void EmitX64::EmitVectorBroadcastElementLower32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 4); @@ -802,7 +769,7 @@ void EmitX64::EmitVectorBroadcastElementLower32(EmitContext& ctx, IR::Inst* inst void EmitX64::EmitVectorBroadcastElement8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 16); @@ -812,7 +779,7 @@ void EmitX64::EmitVectorBroadcastElement8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastb(a, a); } else if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pxor(tmp, tmp); code.pshufb(a, tmp); @@ -826,7 +793,7 @@ void EmitX64::EmitVectorBroadcastElement8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastElement16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 8); @@ -846,7 +813,7 @@ void EmitX64::EmitVectorBroadcastElement16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastElement32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 4); @@ -858,7 +825,7 @@ void EmitX64::EmitVectorBroadcastElement32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastElement64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 2); @@ -1045,9 +1012,9 @@ void EmitX64::EmitVectorCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveEven8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.pand(lhs, tmp); @@ -1059,11 +1026,11 @@ void EmitX64::EmitVectorDeinterleaveEven8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveEven16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code); + auto const zero = ctx.reg_alloc.ScratchXmm(code); code.pxor(zero, zero); code.pblendw(lhs, zero, 0b10101010); @@ -1084,8 +1051,8 @@ void EmitX64::EmitVectorDeinterleaveEven16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveEven32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.shufps(lhs, rhs, 0b10001000); @@ -1094,8 +1061,8 @@ void EmitX64::EmitVectorDeinterleaveEven32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveEven64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.shufpd(lhs, rhs, 0b00); @@ -1104,16 +1071,16 @@ void EmitX64::EmitVectorDeinterleaveEven64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveEvenLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklbw(lhs, rhs); code.pshufb(lhs, code.Const(xword, 0x0D'09'05'01'0C'08'04'00, 0x8080808080808080)); } else { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.movdqa(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.pand(lhs, tmp); @@ -1128,15 +1095,15 @@ void EmitX64::EmitVectorDeinterleaveEvenLower8(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorDeinterleaveEvenLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklwd(lhs, rhs); code.pshufb(lhs, code.Const(xword, 0x0B0A'0302'0908'0100, 0x8080'8080'8080'8080)); } else { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.pslld(lhs, 16); code.psrad(lhs, 16); @@ -1154,8 +1121,8 @@ void EmitX64::EmitVectorDeinterleaveEvenLower16(EmitContext& ctx, IR::Inst* inst void EmitX64::EmitVectorDeinterleaveEvenLower32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); if (code.HasHostFeature(HostFeature::SSE41)) { // copy bytes 0:3 of rhs to lhs, zero out upper 8 bytes @@ -1170,8 +1137,8 @@ void EmitX64::EmitVectorDeinterleaveEvenLower32(EmitContext& ctx, IR::Inst* inst void EmitX64::EmitVectorDeinterleaveOdd8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psraw(lhs, 8); code.psraw(rhs, 8); @@ -1182,8 +1149,8 @@ void EmitX64::EmitVectorDeinterleaveOdd8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveOdd16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psrad(lhs, 16); code.psrad(rhs, 16); @@ -1194,8 +1161,8 @@ void EmitX64::EmitVectorDeinterleaveOdd16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveOdd32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.shufps(lhs, rhs, 0b11011101); @@ -1204,8 +1171,8 @@ void EmitX64::EmitVectorDeinterleaveOdd32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveOdd64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.shufpd(lhs, rhs, 0b11); @@ -1214,15 +1181,15 @@ void EmitX64::EmitVectorDeinterleaveOdd64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveOddLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklbw(lhs, rhs); code.pshufb(lhs, code.Const(xword, 0x0F'0B'07'03'0E'0A'06'02, 0x8080808080808080)); } else { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psraw(lhs, 8); code.psraw(rhs, 8); @@ -1236,15 +1203,15 @@ void EmitX64::EmitVectorDeinterleaveOddLower8(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorDeinterleaveOddLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklwd(lhs, rhs); code.pshufb(lhs, code.Const(xword, 0x0F0E'0706'0D0C'0504, 0x8080'8080'8080'8080)); } else { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psrad(lhs, 16); code.psrad(rhs, 16); @@ -1260,17 +1227,17 @@ void EmitX64::EmitVectorDeinterleaveOddLower32(EmitContext& ctx, IR::Inst* inst) auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm lhs = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); // copy bytes 4:7 of lhs to bytes 0:3 of rhs, zero out upper 8 bytes code.insertps(rhs, lhs, 0b01001100); ctx.reg_alloc.DefineValue(code, inst, rhs); } else { - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const zero = ctx.reg_alloc.ScratchXmm(code); code.xorps(zero, zero); code.unpcklps(lhs, rhs); @@ -1304,9 +1271,9 @@ void EmitX64::EmitVectorEqual64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqd(xmm_a, xmm_b); code.pshufd(tmp, xmm_a, 0b10110001); @@ -1319,9 +1286,9 @@ void EmitX64::EmitVectorEqual128(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqq(xmm_a, xmm_b); code.pshufd(tmp, xmm_a, 0b01001110); @@ -1329,9 +1296,9 @@ void EmitX64::EmitVectorEqual128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, xmm_a); } else { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqd(xmm_a, xmm_b); code.pshufd(tmp, xmm_a, 0b10110001); @@ -1355,16 +1322,16 @@ void EmitX64::EmitVectorExtract(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const xmm_a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.palignr(xmm_b, xmm_a, position / 8); ctx.reg_alloc.DefineValue(code, inst, xmm_b); return; } - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psrldq(xmm_a, position / 8); code.pslldq(xmm_b, (128 - position) / 8); @@ -1376,13 +1343,13 @@ void EmitX64::EmitVectorExtract(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorExtractLower(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 position = args[2].GetImmediateU8(); ASSERT(position % 8 == 0); if (position != 0) { - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklqdq(xmm_a, xmm_b); code.psrldq(xmm_a, position / 8); @@ -1407,22 +1374,33 @@ void EmitX64::EmitVectorGreaterS32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorGreaterS64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE42)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pcmpgtq); - return; + } else { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, code.Const(xword, 0x80000000, 0x80000000)); + code.pxor(tmp0, tmp2); + code.pxor(tmp1, tmp2); + code.movdqa(tmp2, tmp0); + code.pcmpeqd(tmp0, tmp1); + code.pcmpgtd(tmp2, tmp1); + code.pshufd(tmp1, tmp0, 245); + code.pshufd(tmp3, tmp2, 160); + code.pshufd(tmp0, tmp2, 245); + code.pand(tmp1, tmp3); + code.por(tmp0, tmp1); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - for (size_t i = 0; i < result.size(); ++i) { - result[i] = (a[i] > b[i]) ? ~u64(0) : 0; - } - }); } static void EmitVectorHalvingAddSigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, b); code.pand(tmp, a); @@ -1461,9 +1439,9 @@ void EmitX64::EmitVectorHalvingAddS32(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorHalvingAddUnsigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, b); @@ -1506,12 +1484,12 @@ void EmitX64::EmitVectorHalvingAddU32(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorHalvingSubSigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); switch (esize) { case 8: { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, code.Const(xword, 0x8080808080808080, 0x8080808080808080)); code.pxor(a, tmp); code.pxor(b, tmp); @@ -1520,7 +1498,7 @@ static void EmitVectorHalvingSubSigned(size_t esize, EmitContext& ctx, IR::Inst* break; } case 16: { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, code.Const(xword, 0x8000800080008000, 0x8000800080008000)); code.pxor(a, tmp); code.pxor(b, tmp); @@ -1554,8 +1532,8 @@ void EmitX64::EmitVectorHalvingSubS32(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorHalvingSubUnsigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); switch (esize) { case 8: @@ -1592,8 +1570,8 @@ void EmitX64::EmitVectorHalvingSubU32(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorInterleaveLower(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); switch (size) { case 8: @@ -1632,8 +1610,8 @@ void EmitX64::EmitVectorInterleaveLower64(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorInterleaveUpper(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); switch (size) { case 8: @@ -1672,7 +1650,7 @@ void EmitX64::EmitVectorInterleaveUpper64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); if (shift_amount == 0) { @@ -1698,7 +1676,7 @@ void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftLeft16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psllw(result, shift_amount); @@ -1709,7 +1687,7 @@ void EmitX64::EmitVectorLogicalShiftLeft16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.pslld(result, shift_amount); @@ -1720,7 +1698,7 @@ void EmitX64::EmitVectorLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psllq(result, shift_amount); @@ -1731,7 +1709,7 @@ void EmitX64::EmitVectorLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); if (shift_amount == 0) { @@ -1755,7 +1733,7 @@ void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftRight16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psrlw(result, shift_amount); @@ -1766,7 +1744,7 @@ void EmitX64::EmitVectorLogicalShiftRight16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psrld(result, shift_amount); @@ -1777,7 +1755,7 @@ void EmitX64::EmitVectorLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psrlq(result, shift_amount); @@ -1785,41 +1763,12 @@ void EmitX64::EmitVectorLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, result); } -template -static void EmitVectorLogicalVShiftAVX2(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - static_assert(esize == 32 || esize == 64); - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); - - // store sign bit of lowest byte of each element of b to select left/right shift later - ICODE(vpsll)(xmm0, b, u8(esize - 8)); - - // sse/avx shifts are only positive, with dedicated left/right forms - shift by lowest byte of abs(b) - code.vpabsb(b, b); - code.vpand(b, b, code.BConst(xword, 0xFF)); - - // calculate shifts - ICODE(vpsllv)(result, a, b); - ICODE(vpsrlv)(a, a, b); - - // implicit argument: xmm0 (sign of lowest byte of b) - if (esize == 32) { - code.blendvps(result, a); - } else { - code.blendvpd(result, a); - } - ctx.reg_alloc.DefineValue(code, inst, result); -} - void EmitX64::EmitVectorLogicalVShift8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::GFNI)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const left_shift = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Opmask negative_mask = k1; code.pxor(tmp, tmp); @@ -1864,10 +1813,10 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const left_shift = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const right_shift = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); @@ -1888,18 +1837,87 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitVectorLogicalVShift32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX2)) { - EmitVectorLogicalVShiftAVX2<32>(code, ctx, inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); + auto const mask = ctx.reg_alloc.ScratchXmm(code); + // store sign bit of lowest byte of each element of b to select left/right shift later + code.vpslld(mask, b, u8(32 - 8)); + // sse/avx shifts are only positive, with dedicated left/right forms - shift by lowest byte of abs(b) + code.vpabsb(b, b); + code.vpand(b, b, code.BConst<32>(xword, 0xFF)); + // calculate shifts + code.vpsllvd(result, a, b); + code.vpsrlvd(a, a, b); + code.vblendvps(result, result, a, mask); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), VShift); - }); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp5 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp6 = ctx.reg_alloc.ScratchXmm(code); + code.pxor(tmp3, tmp3); + code.movdqa(tmp2, tmp0); + code.psubb(tmp3, tmp1); + code.movdqa(tmp4, tmp2); + code.movdqa(tmp6, tmp2); + code.pminub(tmp3, tmp1); + code.pslld(tmp1, 24); + code.pand(tmp3, code.Const(xword, 0x000000ff'000000ff, 0x000000ff'000000ff)); + code.psrad(tmp1, 31); + code.pshuflw(tmp0, tmp3, 254); + code.pshuflw(tmp5, tmp3, 84); + code.psrld(tmp4, tmp0); + code.movdqa(tmp0, tmp2); + code.psrld(tmp0, tmp5); + code.punpcklqdq(tmp0, tmp4); + code.pshufd(tmp4, tmp3, 238); + code.pslld(tmp3, 23); + code.paddd(tmp3, code.Const(xword, 0x3F80'00003F80'0000, 0x3F80'00003F80'0000)); + code.pshuflw(tmp5, tmp4, 254); + code.pshuflw(tmp4, tmp4, 84); + code.psrld(tmp6, tmp5); + code.movdqa(tmp5, tmp2); + code.psrld(tmp5, tmp4); + code.pshufd(tmp4, tmp2, 245); + code.punpckhqdq(tmp5, tmp6); + code.cvttps2dq(tmp3, tmp3); + code.shufps(tmp0, tmp5, 204); + code.pmuludq(tmp2, tmp3); + code.pshufd(tmp3, tmp3, 245); + code.andps(tmp0, tmp1); + code.pmuludq(tmp3, tmp4); + code.pshufd(tmp2, tmp2, 232); + code.pshufd(tmp3, tmp3, 232); + code.punpckldq(tmp2, tmp3); + code.pandn(tmp1, tmp2); + code.orps(tmp0, tmp1); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } } void EmitX64::EmitVectorLogicalVShift64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX2)) { - EmitVectorLogicalVShiftAVX2<64>(code, ctx, inst); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); + auto const mask = ctx.reg_alloc.ScratchXmm(code); + // store sign bit of lowest byte of each element of b to select left/right shift later + code.vpsllq(mask, b, u8(64 - 8)); + // sse/avx shifts are only positive, with dedicated left/right forms - shift by lowest byte of abs(b) + code.vpabsb(b, b); + code.vpand(b, b, code.BConst<64>(xword, 0xFF)); + // calculate shifts + code.vpsllvq(result, a, b); + code.vpsrlvq(a, a, b); + code.vblendvpd(result, result, a, mask); + ctx.reg_alloc.DefineValue(code, inst, result); } else { EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), VShift); @@ -1914,28 +1932,11 @@ enum class MinMaxOperation { Max, }; -// Compute the minimum/maximum of two vectors of signed 8-bit integers, using only SSE2 instructons. -// The result of the operation is placed in operand a, while b is unmodified. -void FallbackMinMaxS8(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b, MinMaxOperation op) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); - if(op == MinMaxOperation::Min) { - code.movdqa(c, b); - code.pcmpgtb(c, a); - } else { - code.movdqa(c, a); - code.pcmpgtb(c, b); - } - - code.pand(a, c); - code.pandn(c, b); - code.por(a, c); -} - // Compute the minimum/maximum of two vectors of unsigned 16-bit integers, using only SSE2 instructons. // The result of the operation is placed in operand a, while b is unmodified. -void FallbackMinMaxU16(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b, MinMaxOperation op) { +void FallbackMinMaxU16(BlockOfCode& code, EmitContext& ctx, auto const& a, auto const& b, MinMaxOperation op) { if(op == MinMaxOperation::Min) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psubusw(c, b); code.psubw(a, c); @@ -1947,8 +1948,8 @@ void FallbackMinMaxU16(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, // Compute the minimum/maximum of two vectors of signed 32-bit integers, using only SSE2 instructons. // The result of the operation is placed in operand a, while b is unmodified. -void FallbackMinMaxS32(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b, MinMaxOperation op) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); +void FallbackMinMaxS32(BlockOfCode& code, EmitContext& ctx, auto const& a, auto const& b, MinMaxOperation op) { + auto const c = ctx.reg_alloc.ScratchXmm(code); if(op == MinMaxOperation::Min) { code.movdqa(c, b); code.pcmpgtd(c, a); @@ -1964,12 +1965,12 @@ void FallbackMinMaxS32(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, // Compute the minimum/maximum of two vectors of unsigned 32-bit integers, using only SSE2 instructons. // The result of the operation is placed in operand a, while b is unmodified. -void FallbackMinMaxU32(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b, MinMaxOperation op) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); +void FallbackMinMaxU32(BlockOfCode& code, EmitContext& ctx, auto const& a, auto const& b, MinMaxOperation op) { + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, code.BConst<32>(xword, 0x80000000)); // bias a and b by XORing their sign bits, then use the signed comparison function - const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(code); + auto const d = ctx.reg_alloc.ScratchXmm(code); if(op == MinMaxOperation::Min) { code.movdqa(d, a); code.pxor(d, c); @@ -1991,11 +1992,16 @@ void EmitX64::EmitVectorMaxS8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsb); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); - FallbackMinMaxS8(code, ctx, a, b, MinMaxOperation::Max); - ctx.reg_alloc.DefineValue(code, inst, a); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const c = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(c, a); + code.pcmpgtb(c, b); + code.pand(a, c); + code.pandn(c, b); + code.por(a, c); + ctx.reg_alloc.DefineValue(code, inst, a); } } @@ -2007,31 +2013,55 @@ void EmitX64::EmitVectorMaxS32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsd); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); - FallbackMinMaxS32(code, ctx, a, b, MinMaxOperation::Max); - ctx.reg_alloc.DefineValue(code, inst, a); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, tmp0); + code.pcmpgtd(tmp2, tmp1); + code.pand(tmp0, tmp2); + code.pandn(tmp2, tmp1); + code.por(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } } void EmitX64::EmitVectorMaxS64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpmaxsq); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + code.vpmaxsq(xmm_a, xmm_a, xmm_b); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } else if (code.HasHostFeature(HostFeature::AVX)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); code.vpcmpgtq(xmm0, y, x); code.pblendvb(x, y); - ctx.reg_alloc.DefineValue(code, inst, x); } else { - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::max)(x, y); }); - }); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp5 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, code.Const(xword, 0x8000'0000, 0x8000'0000)); + code.movdqa(tmp3, tmp1); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp0); + code.movdqa(tmp4, tmp2); + code.pcmpeqd(tmp2, tmp3); + code.pcmpgtd(tmp4, tmp3); + code.pshufd(tmp2, tmp2, 245); + code.pshufd(tmp5, tmp4, 160); + code.pshufd(tmp3, tmp4, 245); + code.pand(tmp2, tmp5); + code.por(tmp3, tmp2); + code.pand(tmp0, tmp3); + code.pandn(tmp3, tmp1); + code.por(tmp0, tmp3); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } } @@ -2043,11 +2073,11 @@ void EmitX64::EmitVectorMaxU16(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxuw); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); FallbackMinMaxU16(code, ctx, a, b, MinMaxOperation::Max); - ctx.reg_alloc.DefineValue(code, inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } } @@ -2055,35 +2085,54 @@ void EmitX64::EmitVectorMaxU32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxud); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); FallbackMinMaxU32(code, ctx, a, b, MinMaxOperation::Max); - ctx.reg_alloc.DefineValue(code, inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } } void EmitX64::EmitVectorMaxU64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpmaxuq); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + code.vpmaxuq(xmm_a, xmm_a, xmm_b); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } else if (code.HasHostFeature(HostFeature::AVX)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.vmovdqa(xmm0, code.Const(xword, 0x8000000000000000, 0x8000000000000000)); code.vpsubq(tmp, y, xmm0); code.vpsubq(xmm0, x, xmm0); code.vpcmpgtq(xmm0, tmp, xmm0); code.pblendvb(x, y); - ctx.reg_alloc.DefineValue(code, inst, x); } else { - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::max)(x, y); }); - }); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp5 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); + code.movdqa(tmp3, tmp1); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp0); + code.movdqa(tmp4, tmp2); + code.pcmpeqd(tmp2, tmp3); + code.pcmpgtd(tmp4, tmp3); + code.pshufd(tmp2, tmp2, 245); + code.pshufd(tmp5, tmp4, 160); + code.pshufd(tmp3, tmp4, 245); + code.pand(tmp2, tmp5); + code.por(tmp3, tmp2); + code.pand(tmp0, tmp3); + code.pandn(tmp3, tmp1); + code.por(tmp0, tmp3); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } } @@ -2091,11 +2140,16 @@ void EmitX64::EmitVectorMinS8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminsb); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); - FallbackMinMaxS8(code, ctx, a, b, MinMaxOperation::Min); - ctx.reg_alloc.DefineValue(code, inst, a); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const c = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(c, b); + code.pcmpgtb(c, a); + code.pand(a, c); + code.pandn(c, b); + code.por(a, c); + ctx.reg_alloc.DefineValue(code, inst, a); } } @@ -2107,31 +2161,51 @@ void EmitX64::EmitVectorMinS32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminsd); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); FallbackMinMaxS32(code, ctx, a, b, MinMaxOperation::Min); - ctx.reg_alloc.DefineValue(code, inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } } void EmitX64::EmitVectorMinS64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpminsq); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + code.vpminsq(xmm_a, xmm_a, xmm_b); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } else if (code.HasHostFeature(HostFeature::AVX)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.vpcmpgtq(xmm0, y, x); code.pblendvb(y, x); - ctx.reg_alloc.DefineValue(code, inst, y); } else { - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::min)(x, y); }); - }); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp5 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, code.Const(xword, 0x8000'0000, 0x8000'0000)); + code.movdqa(tmp3, tmp1); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp0); + code.movdqa(tmp4, tmp2); + code.pcmpeqd(tmp2, tmp3); + code.pcmpgtd(tmp4, tmp3); + code.pshufd(tmp3, tmp2, 245); + code.pshufd(tmp5, tmp4, 160); + code.pshufd(tmp2, tmp4, 245); + code.pand(tmp3, tmp5); + code.por(tmp2, tmp3); + code.pand(tmp1, tmp2); + code.pandn(tmp2, tmp0); + code.por(tmp2, tmp1); + //code.movdqa(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp2); } } @@ -2143,11 +2217,11 @@ void EmitX64::EmitVectorMinU16(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminuw); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); FallbackMinMaxU16(code, ctx, a, b, MinMaxOperation::Min); - ctx.reg_alloc.DefineValue(code, inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } } @@ -2155,57 +2229,93 @@ void EmitX64::EmitVectorMinU32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminud); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); FallbackMinMaxU32(code, ctx, a, b, MinMaxOperation::Min); - ctx.reg_alloc.DefineValue(code, inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } } void EmitX64::EmitVectorMinU64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpminuq); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + code.vpminuq(xmm_a, xmm_a, xmm_b); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } else if (code.HasHostFeature(HostFeature::AVX)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.vmovdqa(xmm0, code.Const(xword, 0x8000000000000000, 0x8000000000000000)); code.vpsubq(tmp, y, xmm0); code.vpsubq(xmm0, x, xmm0); code.vpcmpgtq(xmm0, tmp, xmm0); code.pblendvb(y, x); - ctx.reg_alloc.DefineValue(code, inst, y); } else { - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::min)(x, y); }); - }); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp5 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); + code.movdqa(tmp3, tmp1); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp0); + code.movdqa(tmp4, tmp2); + code.pcmpeqd(tmp2, tmp3); + code.pcmpgtd(tmp4, tmp3); + code.pshufd(tmp3, tmp2, 245); + code.pshufd(tmp5, tmp4, 160); + code.pshufd(tmp2, tmp4, 245); + code.pand(tmp3, tmp5); + code.por(tmp2, tmp3); + code.pand(tmp1, tmp2); + code.pandn(tmp2, tmp0); + code.por(tmp2, tmp1); + //code.movdqa(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp2); } } void EmitX64::EmitVectorMultiply8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm(code); - - // TODO: Optimize - code.movdqa(tmp_a, a); - code.movdqa(tmp_b, b); - code.pmullw(a, b); - code.psrlw(tmp_a, 8); - code.psrlw(tmp_b, 8); - code.pmullw(tmp_a, tmp_b); - code.pand(a, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); - code.psllw(tmp_a, 8); - code.por(a, tmp_a); - - ctx.reg_alloc.DefineValue(code, inst, a); + if (code.HasHostFeature(HostFeature::AVX)) { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + code.vbroadcastss(tmp3, code.Const(dword, 0x00ff'00ff)); + code.vpmullw(tmp2, tmp1, tmp0); + code.vpandn(tmp0, tmp3, tmp0); + code.vpand(tmp2, tmp2, tmp3); + code.vpmaddubsw(tmp0, tmp1, tmp0); + code.vpsllw(tmp0, tmp0, 8); + code.vpor(tmp0, tmp2, tmp0); + ctx.reg_alloc.DefineValue(code, inst, tmp0); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, tmp0); + code.movdqa(tmp3, tmp1); + code.movdqa(tmp4, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); + code.punpckhbw(tmp2, tmp2); + code.punpckhbw(tmp3, tmp3); + code.punpcklbw(tmp0, tmp0); + code.punpcklbw(tmp1, tmp1); + code.pmullw(tmp3, tmp2); + code.pmullw(tmp0, tmp1); + code.pand(tmp3, tmp4); + code.pand(tmp0, tmp4); + code.packuswb(tmp0, tmp3); + ctx.reg_alloc.DefineValue(code, inst, tmp0); + } } void EmitX64::EmitVectorMultiply16(EmitContext& ctx, IR::Inst* inst) { @@ -2216,31 +2326,32 @@ void EmitX64::EmitVectorMultiply32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmulld); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - - code.movdqa(tmp, a); - code.psrlq(a, 32); - code.pmuludq(tmp, b); - code.psrlq(b, 32); - code.pmuludq(a, b); - code.pshufd(tmp, tmp, 0b00001000); - code.pshufd(b, a, 0b00001000); - code.punpckldq(tmp, b); - - ctx.reg_alloc.DefineValue(code, inst, tmp); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp, a); + code.psrlq(a, 32); + code.pmuludq(tmp, b); + code.psrlq(b, 32); + code.pmuludq(a, b); + code.pshufd(tmp, tmp, 0b00001000); + code.pshufd(b, a, 0b00001000); + code.punpckldq(tmp, b); + ctx.reg_alloc.DefineValue(code, inst, tmp); } } void EmitX64::EmitVectorMultiply64(EmitContext& ctx, IR::Inst* inst) { - if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512DQ)) { - EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpmullq); - } else if (code.HasHostFeature(HostFeature::SSE41)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512DQ)) { + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + code.vpmullq(xmm_a, xmm_a, xmm_b); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); + } else if (code.HasHostFeature(HostFeature::SSE41)) { + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); const Xbyak::Reg64 tmp1 = ctx.reg_alloc.ScratchGpr(code); const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(code); @@ -2255,29 +2366,28 @@ void EmitX64::EmitVectorMultiply64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, a); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); - code.movdqa(tmp1, a); - code.movdqa(tmp2, a); - code.movdqa(tmp3, b); + code.movdqa(tmp1, a); + code.movdqa(tmp2, a); + code.movdqa(tmp3, b); - code.psrlq(tmp1, 32); - code.psrlq(tmp3, 32); + code.psrlq(tmp1, 32); + code.psrlq(tmp3, 32); - code.pmuludq(tmp2, b); - code.pmuludq(tmp3, a); - code.pmuludq(b, tmp1); + code.pmuludq(tmp2, b); + code.pmuludq(tmp3, a); + code.pmuludq(b, tmp1); - code.paddq(b, tmp3); - code.psllq(b, 32); - code.paddq(tmp2, b); + code.paddq(b, tmp3); + code.psllq(b, 32); + code.paddq(tmp2, b); - ctx.reg_alloc.DefineValue(code, inst, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp2); } } @@ -2309,15 +2419,15 @@ void EmitX64::EmitVectorNarrow16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmovwb(result, a); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.pand(a, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); @@ -2330,13 +2440,13 @@ void EmitX64::EmitVectorNarrow16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorNarrow32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmovdw(result, a); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); if (code.HasHostFeature(HostFeature::SSE41)) { code.pblendw(a, zeros, 0b10101010); @@ -2354,15 +2464,15 @@ void EmitX64::EmitVectorNarrow64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmovqd(result, a); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.shufps(a, zeros, 0b00001000); @@ -2375,13 +2485,13 @@ void EmitX64::EmitVectorNot(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); + auto const operand = ctx.reg_alloc.UseXmm(code, args[0]); code.vpternlogq(result, operand, operand, u8(~Tern::c)); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqw(xmm_b, xmm_b); code.pxor(xmm_a, xmm_b); ctx.reg_alloc.DefineValue(code, inst, xmm_a); @@ -2395,9 +2505,9 @@ void EmitX64::EmitVectorOr(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAddLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.punpcklqdq(xmm_a, xmm_b); code.movdqa(tmp, xmm_a); @@ -2413,9 +2523,9 @@ void EmitX64::EmitVectorPairedAddLower8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAddLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.punpcklqdq(xmm_a, xmm_b); if (code.HasHostFeature(HostFeature::SSSE3)) { @@ -2436,9 +2546,9 @@ void EmitX64::EmitVectorPairedAddLower16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.punpcklqdq(xmm_a, xmm_b); if (code.HasHostFeature(HostFeature::SSSE3)) { @@ -2458,10 +2568,10 @@ void EmitX64::EmitVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAdd8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const c = ctx.reg_alloc.ScratchXmm(code); + auto const d = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.movdqa(d, b); @@ -2480,17 +2590,17 @@ void EmitX64::EmitVectorPairedAdd16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); code.phaddw(a, b); ctx.reg_alloc.DefineValue(code, inst, a); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const c = ctx.reg_alloc.ScratchXmm(code); + auto const d = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.movdqa(d, b); @@ -2510,17 +2620,17 @@ void EmitX64::EmitVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); code.phaddd(a, b); ctx.reg_alloc.DefineValue(code, inst, a); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const c = ctx.reg_alloc.ScratchXmm(code); + auto const d = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.movdqa(d, b); @@ -2537,9 +2647,9 @@ void EmitX64::EmitVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.punpcklqdq(a, b); @@ -2552,8 +2662,8 @@ void EmitX64::EmitVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAddSignedWiden8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psllw(a, 8); @@ -2567,8 +2677,8 @@ void EmitX64::EmitVectorPairedAddSignedWiden8(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorPairedAddSignedWiden16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.pslld(a, 16); @@ -2582,18 +2692,18 @@ void EmitX64::EmitVectorPairedAddSignedWiden16(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.vpsraq(c, a, 32); code.vpsllq(a, a, 32); code.vpsraq(a, a, 32); code.vpaddq(a, a, c); } else { - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psllq(a, 32); @@ -2615,8 +2725,8 @@ void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorPairedAddUnsignedWiden8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psllw(a, 8); @@ -2630,8 +2740,8 @@ void EmitX64::EmitVectorPairedAddUnsignedWiden8(EmitContext& ctx, IR::Inst* inst void EmitX64::EmitVectorPairedAddUnsignedWiden16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.pslld(a, 16); @@ -2645,8 +2755,8 @@ void EmitX64::EmitVectorPairedAddUnsignedWiden16(EmitContext& ctx, IR::Inst* ins void EmitX64::EmitVectorPairedAddUnsignedWiden32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psllq(a, 32); @@ -2660,14 +2770,10 @@ void EmitX64::EmitVectorPairedAddUnsignedWiden32(EmitContext& ctx, IR::Inst* ins template static void PairedOperation(VectorArray& result, const VectorArray& x, const VectorArray& y, Function fn) { const size_t range = x.size() / 2; - - for (size_t i = 0; i < range; i++) { + for (size_t i = 0; i < range; i++) result[i] = fn(x[2 * i], x[2 * i + 1]); - } - - for (size_t i = 0; i < range; i++) { + for (size_t i = 0; i < range; i++) result[range + i] = fn(y[2 * i], y[2 * i + 1]); - } } template @@ -2688,11 +2794,6 @@ static void PairedMax(VectorArray& result, const VectorArray& x, const Vec PairedOperation(result, x, y, [](auto a, auto b) { return (std::max)(a, b); }); } -template -static void PairedMin(VectorArray& result, const VectorArray& x, const VectorArray& y) { - PairedOperation(result, x, y, [](auto a, auto b) { return (std::min)(a, b); }); -} - template static void LowerPairedMax(VectorArray& result, const VectorArray& x, const VectorArray& y) { LowerPairedOperation(result, x, y, [](auto a, auto b) { return (std::max)(a, b); }); @@ -2707,19 +2808,16 @@ template static void EmitVectorPairedMinMax8(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, code.Const(xword, 0x0E'0C'0A'08'06'04'02'00, 0x0F'0D'0B'09'07'05'03'01)); code.pshufb(x, tmp); code.pshufb(y, tmp); - code.movaps(tmp, x); code.shufps(tmp, y, 0b01'00'01'00); - code.shufps(x, y, 0b11'10'11'10); - if constexpr (std::is_member_function_pointer_v) { (code.*fn)(x, tmp); } else { @@ -2732,21 +2830,17 @@ static void EmitVectorPairedMinMax8(BlockOfCode& code, EmitContext& ctx, IR::Ins template static void EmitVectorPairedMinMaxLower8(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.punpcklqdq(x, y); code.pshufb(x, code.Const(xword, 0x0E'0C'0A'08'06'04'02'00, 0x0F'0D'0B'09'07'05'03'01)); code.movhlps(y, x); code.movq(x, x); - if constexpr (std::is_member_function_pointer_v) { (code.*fn)(x, y); } else { fn(x, y); } - ctx.reg_alloc.DefineValue(code, inst, x); } @@ -2754,9 +2848,9 @@ template static void EmitVectorPairedMinMax16(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); // swap idxs 1 and 2 within 64-bit lanes so that both registers contain [even, odd, even, odd]-indexed pairs of elements code.pshuflw(x, x, 0b11'01'10'00); @@ -2782,63 +2876,31 @@ static void EmitVectorPairedMinMax16(BlockOfCode& code, EmitContext& ctx, IR::In ctx.reg_alloc.DefineValue(code, inst, x); } -template -static void EmitVectorPairedMinMaxLower16(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - - // swap idxs 1 and 2 so that both registers contain even then odd-indexed pairs of elements - code.pshuflw(x, x, 0b11'01'10'00); - code.pshuflw(y, y, 0b11'01'10'00); - - // move pairs of even/odd-indexed elements into one register each - - // tmp = x[0, 2], y[0, 2], 0s... - code.movaps(tmp, y); - code.insertps(tmp, x, 0b01001100); - // x = x[1, 3], y[1, 3], 0s... - code.insertps(x, y, 0b00011100); - - (code.*fn)(x, tmp); - - ctx.reg_alloc.DefineValue(code, inst, x); -} - -static void EmitVectorPairedMinMaxLower32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - - // tmp = x[1], y[1], 0, 0 - code.movaps(tmp, y); - code.insertps(tmp, x, 0b01001100); - // x = x[0], y[0], 0, 0 - code.insertps(x, y, 0b00011100); - - (code.*fn)(x, tmp); - - ctx.reg_alloc.DefineValue(code, inst, x); -} void EmitX64::EmitVectorPairedMaxS8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp, code.Const(xword, 0x0E'0C'0A'08'06'04'02'00, 0x0F'0D'0B'09'07'05'03'01)); + code.pshufb(x, tmp); + code.pshufb(y, tmp); + code.movaps(tmp, x); + code.shufps(tmp, y, 0b01'00'01'00); + code.shufps(x, y, 0b11'10'11'10); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMax8(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsb); - return; - } else if (code.HasHostFeature(HostFeature::SSSE3)) { - EmitVectorPairedMinMax8(code, ctx, inst, [&](const auto& lhs, const auto& rhs) { - FallbackMinMaxS8(code, ctx, lhs, rhs, MinMaxOperation::Max); - }); - return; + code.pmaxsb(x, tmp); + } else { + auto const a = x; + auto const b = tmp; + auto const c = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(c, a); + code.pcmpgtb(c, b); + code.pand(a, c); + code.pandn(c, b); + code.por(a, c); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - PairedMax(result, a, b); - }); + ctx.reg_alloc.DefineValue(code, inst, x); } void EmitX64::EmitVectorPairedMaxS16(EmitContext& ctx, IR::Inst* inst) { @@ -2848,9 +2910,9 @@ void EmitX64::EmitVectorPairedMaxS16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMaxS32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, x); code.shufps(tmp, y, 0b10001000); @@ -2868,12 +2930,24 @@ void EmitX64::EmitVectorPairedMaxS32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMaxU8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSSE3)) { EmitVectorPairedMinMax8(code, ctx, inst, &Xbyak::CodeGenerator::pmaxub); - return; + } else { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const constant_00ff = code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF); + code.movdqa(tmp2, constant_00ff); + code.movdqa(tmp3, tmp1); + code.pand(tmp3, tmp2); + code.pand(tmp2, tmp0); + code.packuswb(tmp2, tmp3); + code.psrlw(tmp1, 8); + code.psrlw(tmp0, 8); + code.packuswb(tmp0, tmp1); + code.pmaxub(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - PairedMax(result, a, b); - }); } void EmitX64::EmitVectorPairedMaxU16(EmitContext& ctx, IR::Inst* inst) { @@ -2889,9 +2963,9 @@ void EmitX64::EmitVectorPairedMaxU16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMaxU32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, x); code.shufps(tmp, y, 0b10001000); @@ -2909,14 +2983,15 @@ void EmitX64::EmitVectorPairedMaxU32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMinS8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorPairedMinMax8(code, ctx, inst, &Xbyak::CodeGenerator::pminsb); - } else if (code.HasHostFeature(HostFeature::SSSE3)) { - EmitVectorPairedMinMax8(code, ctx, inst, [&](const auto& lhs, const auto& rhs) { - FallbackMinMaxS8(code, ctx, lhs, rhs, MinMaxOperation::Min); - }); } else { - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - PairedMin(result, a, b); - }); + EmitVectorPairedMinMax8(code, ctx, inst, [&](const auto& a, const auto& b) { + auto const c = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(c, b); + code.pcmpgtb(c, a); + code.pand(a, c); + code.pandn(c, b); + code.por(a, c); + }); } } @@ -2927,9 +3002,9 @@ void EmitX64::EmitVectorPairedMinS16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMinS32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, x); code.shufps(tmp, y, 0b10001000); @@ -2945,12 +3020,25 @@ void EmitX64::EmitVectorPairedMinS32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMinU8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSSE3)) { EmitVectorPairedMinMax8(code, ctx, inst, &Xbyak::CodeGenerator::pminub); - return; + } else { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const constant_00ff = code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF); + code.movdqa(tmp2, tmp1); + code.psrlw(tmp2, 8); + code.movdqa(tmp3, tmp0); + code.psrlw(tmp3, 8); + code.packuswb(tmp3, tmp2); + code.movdqa(tmp2, constant_00ff); + code.pand(tmp1, tmp2); + code.pand(tmp0, tmp2); + code.packuswb(tmp0, tmp1); + code.pminub(tmp0, tmp3); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - PairedMin(result, a, b); - }); } void EmitX64::EmitVectorPairedMinU16(EmitContext& ctx, IR::Inst* inst) { @@ -2966,9 +3054,9 @@ void EmitX64::EmitVectorPairedMinU16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMinU32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, x); code.shufps(tmp, y, 0b10001000); @@ -2984,41 +3072,88 @@ void EmitX64::EmitVectorPairedMinU32(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitVectorPairedMaxLowerS8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower8(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsb); - return; - } else if (code.HasHostFeature(HostFeature::SSSE3)) { - EmitVectorPairedMinMaxLower8(code, ctx, inst, [&](const auto& lhs, const auto& rhs) { - FallbackMinMaxS8(code, ctx, lhs, rhs, MinMaxOperation::Max); - }); - return; + code.punpcklqdq(x, y); + code.pshufb(x, code.Const(xword, 0x0E'0C'0A'08'06'04'02'00, 0x0F'0D'0B'09'07'05'03'01)); + code.movhlps(y, x); + code.movq(x, x); + code.pmaxsb(x, y); + } else { + auto const c = ctx.reg_alloc.ScratchXmm(code); + code.punpcklqdq(x, y); + code.pshufb(x, code.Const(xword, 0x0E'0C'0A'08'06'04'02'00, 0x0F'0D'0B'09'07'05'03'01)); + code.movhlps(y, x); + code.movq(x, x); + code.movdqa(c, x); + code.pcmpgtb(c, y); + code.pand(x, c); + code.pandn(c, y); + code.por(x, c); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMax(result, a, b); - }); + ctx.reg_alloc.DefineValue(code, inst, x); } void EmitX64::EmitVectorPairedMaxLowerS16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower16(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsw); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // swap idxs 1 and 2 so that both registers contain even then odd-indexed pairs of elements + code.pshuflw(x, x, 0b11'01'10'00); + code.pshuflw(y, y, 0b11'01'10'00); + // move pairs of even/odd-indexed elements into one register each + // tmp = x[0, 2], y[0, 2], 0s... + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[1, 3], y[1, 3], 0s... + code.insertps(x, y, 0b00011100); + code.pmaxsw(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + code.punpcklwd(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 232); + code.pshuflw(tmp1, tmp1, 216); + code.pshufd(tmp0, tmp0, 231); + code.pshuflw(tmp0, tmp0, 114); + code.pmaxsw(tmp0, tmp1); + code.movq(tmp0, tmp0); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMax(result, a, b); - }); } void EmitX64::EmitVectorPairedMaxLowerS32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower32(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsd); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // tmp = x[1], y[1], 0, 0 + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[0], y[0], 0, 0 + code.insertps(x, y, 0b00011100); + code.pmaxsd(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.punpckldq(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 238); + code.movdqa(tmp2, tmp0); + code.pcmpgtd(tmp2, tmp1); + code.pand(tmp0, tmp2); + code.pandn(tmp2, tmp1); + code.por(tmp2, tmp0); + code.movq(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMax(result, a, b); - }); } void EmitX64::EmitVectorPairedMaxLowerU8(EmitContext& ctx, IR::Inst* inst) { @@ -3033,63 +3168,143 @@ void EmitX64::EmitVectorPairedMaxLowerU8(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitVectorPairedMaxLowerU16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower16(code, ctx, inst, &Xbyak::CodeGenerator::pmaxuw); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // swap idxs 1 and 2 so that both registers contain even then odd-indexed pairs of elements + code.pshuflw(x, x, 0b11'01'10'00); + code.pshuflw(y, y, 0b11'01'10'00); + // move pairs of even/odd-indexed elements into one register each + // tmp = x[0, 2], y[0, 2], 0s... + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[1, 3], y[1, 3], 0s... + code.insertps(x, y, 0b00011100); + code.pmaxuw(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + code.punpcklwd(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 232); + code.pshuflw(tmp1, tmp1, 216); + code.pshufd(tmp0, tmp0, 231); + code.pshuflw(tmp0, tmp0, 114); + code.psubusw(tmp0, tmp1); + code.paddw(tmp0, tmp1); + code.movq(tmp0, tmp0); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMax(result, a, b); - }); } void EmitX64::EmitVectorPairedMaxLowerU32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower32(code, ctx, inst, &Xbyak::CodeGenerator::pmaxud); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // tmp = x[1], y[1], 0, 0 + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[0], y[0], 0, 0 + code.insertps(x, y, 0b00011100); + code.pmaxud(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + code.punpckldq(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 238); + code.movdqa(tmp2, code.Const(xword, 0x8000'00008000'0000, 0x8000'00008000'0000)); + code.movdqa(tmp3, tmp0); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp1); + code.pcmpgtd(tmp3, tmp2); + code.pand(tmp0, tmp3); + code.pandn(tmp3, tmp1); + code.por(tmp3, tmp0); + code.movq(tmp0, tmp3); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMax(result, a, b); - }); } void EmitX64::EmitVectorPairedMinLowerS8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorPairedMinMaxLower8(code, ctx, inst, &Xbyak::CodeGenerator::pminsb); - return; - } else if (code.HasHostFeature(HostFeature::SSSE3)) { - EmitVectorPairedMinMaxLower8(code, ctx, inst, [&](const auto& lhs, const auto& rhs) { - FallbackMinMaxS8(code, ctx, lhs, rhs, MinMaxOperation::Min); + } else { + EmitVectorPairedMinMaxLower8(code, ctx, inst, [&](const auto& a, const auto& b) { + auto const c = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(c, b); + code.pcmpgtb(c, a); + code.pand(a, c); + code.pandn(c, b); + code.por(a, c); }); - return; } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMin(result, a, b); - }); } void EmitX64::EmitVectorPairedMinLowerS16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower16(code, ctx, inst, &Xbyak::CodeGenerator::pminsw); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // swap idxs 1 and 2 so that both registers contain even then odd-indexed pairs of elements + code.pshuflw(x, x, 0b11'01'10'00); + code.pshuflw(y, y, 0b11'01'10'00); + // move pairs of even/odd-indexed elements into one register each + // tmp = x[0, 2], y[0, 2], 0s... + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[1, 3], y[1, 3], 0s... + code.insertps(x, y, 0b00011100); + code.pminsw(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + code.punpcklwd(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 231); + code.pshuflw(tmp1, tmp1, 114); + code.pshufd(tmp0, tmp0, 232); + code.pshuflw(tmp0, tmp0, 216); + code.pminsw(tmp0, tmp1); + code.movq(tmp0, tmp0); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMin(result, a, b); - }); } void EmitX64::EmitVectorPairedMinLowerS32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower32(code, ctx, inst, &Xbyak::CodeGenerator::pminsd); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // tmp = x[1], y[1], 0, 0 + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[0], y[0], 0, 0 + code.insertps(x, y, 0b00011100); + code.pminsd(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.punpckldq(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 238); + code.movdqa(tmp2, tmp0); + code.pcmpgtd(tmp2, tmp1); + code.pand(tmp1, tmp2); + code.pandn(tmp2, tmp0); + code.por(tmp2, tmp1); + code.movq(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMin(result, a, b); - }); } void EmitX64::EmitVectorPairedMinLowerU8(EmitContext& ctx, IR::Inst* inst) { @@ -3104,50 +3319,91 @@ void EmitX64::EmitVectorPairedMinLowerU8(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitVectorPairedMinLowerU16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower16(code, ctx, inst, &Xbyak::CodeGenerator::pminuw); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // swap idxs 1 and 2 so that both registers contain even then odd-indexed pairs of elements + code.pshuflw(x, x, 0b11'01'10'00); + code.pshuflw(y, y, 0b11'01'10'00); + // move pairs of even/odd-indexed elements into one register each + // tmp = x[0, 2], y[0, 2], 0s... + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[1, 3], y[1, 3], 0s... + code.insertps(x, y, 0b00011100); + code.pminuw(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.punpcklwd(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 231); + code.pshuflw(tmp1, tmp1, 114); + code.pshufd(tmp0, tmp0, 232); + code.pshuflw(tmp0, tmp0, 216); + code.movdqa(tmp2, tmp1); + code.psubusw(tmp2, tmp0); + code.psubw(tmp1, tmp2); + code.movq(tmp0, tmp1); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMin(result, a, b); - }); } void EmitX64::EmitVectorPairedMinLowerU32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower32(code, ctx, inst, &Xbyak::CodeGenerator::pminud); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // tmp = x[1], y[1], 0, 0 + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[0], y[0], 0, 0 + code.insertps(x, y, 0b00011100); + code.pminud(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + code.punpckldq(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 238); + code.movdqa(tmp2, code.Const(xword, 0x8000'00008000'0000, 0x8000'00008000'0000)); + code.movdqa(tmp3, tmp0); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp1); + code.pcmpgtd(tmp3, tmp2); + code.pand(tmp1, tmp3); + code.pandn(tmp3, tmp0); + code.por(tmp3, tmp1); + code.movq(tmp0, tmp3); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMin(result, a, b); - }); } template static D PolynomialMultiply(T lhs, T rhs) { constexpr size_t bit_size = mcl::bitsizeof; const std::bitset operand(lhs); - D res = 0; - for (size_t i = 0; i < bit_size; i++) { - if (operand[i]) { + for (size_t i = 0; i < bit_size; i++) + if (operand[i]) res ^= rhs << i; - } - } - return res; } void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm alternate = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); + auto const alternate = ctx.reg_alloc.ScratchXmm(code); + auto const mask = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Reg32 counter = ctx.reg_alloc.ScratchGpr(code).cvt32(); Xbyak::Label loop; @@ -3185,11 +3441,11 @@ void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPolynomialMultiplyLong8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm alternate = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); + auto const alternate = ctx.reg_alloc.ScratchXmm(code); + auto const mask = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Reg32 counter = ctx.reg_alloc.ScratchGpr(code).cvt32(); Xbyak::Label loop; @@ -3231,8 +3487,8 @@ void EmitX64::EmitVectorPolynomialMultiplyLong8(EmitContext& ctx, IR::Inst* inst void EmitX64::EmitVectorPolynomialMultiplyLong64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::PCLMULQDQ)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); code.pclmulqdq(xmm_a, xmm_b, 0x00); @@ -3262,7 +3518,7 @@ void EmitX64::EmitVectorPolynomialMultiplyLong64(EmitContext& ctx, IR::Inst* ins void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::AVX512BITALG)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpopcntb(data, data); @@ -3273,10 +3529,10 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSSE3)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm low_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm high_a = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const low_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const high_a = ctx.reg_alloc.ScratchXmm(code); + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(high_a, low_a); code.psrlw(high_a, 4); @@ -3305,12 +3561,12 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::GFNI)) { code.gf2p8affineqb(data, code.Const(xword, 0x8040201008040201, 0x8040201008040201), 0); } else { - const Xbyak::Xmm high_nibble_reg = ctx.reg_alloc.ScratchXmm(code); + auto const high_nibble_reg = ctx.reg_alloc.ScratchXmm(code); code.movdqa(high_nibble_reg, code.Const(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); code.pand(high_nibble_reg, data); code.pxor(data, high_nibble_reg); @@ -3318,7 +3574,7 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSSE3)) { // High lookup - const Xbyak::Xmm high_reversed_reg = ctx.reg_alloc.ScratchXmm(code); + auto const high_reversed_reg = ctx.reg_alloc.ScratchXmm(code); code.movdqa(high_reversed_reg, code.Const(xword, 0xE060A020C0408000, 0xF070B030D0509010)); code.pshufb(high_reversed_reg, data); @@ -3352,8 +3608,8 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReverseElementsInHalfGroups8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, data); code.psllw(tmp, 8); @@ -3365,13 +3621,13 @@ void EmitX64::EmitVectorReverseElementsInHalfGroups8(EmitContext& ctx, IR::Inst* void EmitX64::EmitVectorReverseElementsInWordGroups8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpshufb(data, data, code.Const(xword, 0x0405060700010203, 0x0c0d0e0f08090a0b)); } else if (code.HasHostFeature(HostFeature::SSSE3)) { code.pshufb(data, code.Const(xword, 0x0405060700010203, 0x0c0d0e0f08090a0b)); } else { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, data); code.psllw(tmp, 8); code.psrlw(data, 8); @@ -3384,7 +3640,7 @@ void EmitX64::EmitVectorReverseElementsInWordGroups8(EmitContext& ctx, IR::Inst* void EmitX64::EmitVectorReverseElementsInWordGroups16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(data, data, 0b10110001); code.pshufhw(data, data, 0b10110001); ctx.reg_alloc.DefineValue(code, inst, data); @@ -3392,13 +3648,13 @@ void EmitX64::EmitVectorReverseElementsInWordGroups16(EmitContext& ctx, IR::Inst void EmitX64::EmitVectorReverseElementsInLongGroups8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpshufb(data, data, code.Const(xword, 0x0001020304050607, 0x08090a0b0c0d0e0f)); } else if (code.HasHostFeature(HostFeature::SSSE3)) { code.pshufb(data, code.Const(xword, 0x0001020304050607, 0x08090a0b0c0d0e0f)); } else { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, data); code.psllw(tmp, 8); code.psrlw(data, 8); @@ -3412,7 +3668,7 @@ void EmitX64::EmitVectorReverseElementsInLongGroups8(EmitContext& ctx, IR::Inst* void EmitX64::EmitVectorReverseElementsInLongGroups16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(data, data, 0b00011011); code.pshufhw(data, data, 0b00011011); @@ -3423,7 +3679,7 @@ void EmitX64::EmitVectorReverseElementsInLongGroups16(EmitContext& ctx, IR::Inst void EmitX64::EmitVectorReverseElementsInLongGroups32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(data, data, 0b01001110); code.pshufhw(data, data, 0b01001110); @@ -3434,8 +3690,8 @@ void EmitX64::EmitVectorReverseElementsInLongGroups32(EmitContext& ctx, IR::Inst void EmitX64::EmitVectorReduceAdd8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm temp = xmm0; + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const temp = xmm0; // Add upper elements to lower elements code.pshufd(temp, data, 0b01'00'11'10); @@ -3455,8 +3711,8 @@ void EmitX64::EmitVectorReduceAdd8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReduceAdd16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm temp = xmm0; + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const temp = xmm0; if (code.HasHostFeature(HostFeature::SSSE3)) { code.pxor(temp, temp); @@ -3486,8 +3742,8 @@ void EmitX64::EmitVectorReduceAdd16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReduceAdd32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm temp = xmm0; + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const temp = xmm0; // Add upper elements to lower elements(reversed) code.pshufd(temp, data, 0b00'01'10'11); @@ -3510,8 +3766,8 @@ void EmitX64::EmitVectorReduceAdd32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReduceAdd64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm temp = xmm0; + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const temp = xmm0; // Add upper elements to lower elements code.pshufd(temp, data, 0b01'00'11'10); @@ -3526,8 +3782,8 @@ void EmitX64::EmitVectorReduceAdd64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorRotateWholeVectorRight(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const operand = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); const u8 shift_amount = args[1].GetImmediateU8(); ASSERT(shift_amount % 32 == 0); const u8 shuffle_imm = std::rotr(0b11100100, shift_amount / 32 * 2); @@ -3540,12 +3796,12 @@ void EmitX64::EmitVectorRotateWholeVectorRight(EmitContext& ctx, IR::Inst* inst) static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); switch (esize) { case 8: { - const Xbyak::Xmm vec_128 = ctx.reg_alloc.ScratchXmm(code); + auto const vec_128 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(vec_128, code.Const(xword, 0x8080808080808080, 0x8080808080808080)); code.paddb(a, vec_128); @@ -3555,7 +3811,7 @@ static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, I break; } case 16: { - const Xbyak::Xmm vec_32768 = ctx.reg_alloc.ScratchXmm(code); + auto const vec_32768 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(vec_32768, code.Const(xword, 0x8000800080008000, 0x8000800080008000)); code.paddw(a, vec_32768); @@ -3565,7 +3821,7 @@ static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, I break; } case 32: { - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp1, a); code.por(a, b); @@ -3605,9 +3861,9 @@ static void EmitVectorRoundingHalvingAddUnsigned(size_t esize, EmitContext& ctx, case 32: { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp1, a); @@ -3671,18 +3927,18 @@ static void EmitUnsignedRoundingShiftLeft(BlockOfCode& code, EmitContext& ctx, I static_assert(esize == 32 || esize == 64); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); // positive values of b are left shifts, while negative values are (positive) rounding right shifts // only the lowest byte of each element is read as the shift amount // conveniently, the behavior of bit shifts greater than element width is the same in NEON and SSE/AVX - filled with zeros - const Xbyak::Xmm shift_amount = ctx.reg_alloc.ScratchXmm(code); + auto const shift_amount = ctx.reg_alloc.ScratchXmm(code); code.vpabsb(shift_amount, b); code.vpand(shift_amount, shift_amount, code.BConst(xword, 0xFF)); // if b is positive, do a normal left shift - const Xbyak::Xmm left_shift = ctx.reg_alloc.ScratchXmm(code); + auto const left_shift = ctx.reg_alloc.ScratchXmm(code); ICODE(vpsllv)(left_shift, a, shift_amount); // if b is negative, compute the rounding right shift @@ -3693,7 +3949,7 @@ static void EmitUnsignedRoundingShiftLeft(BlockOfCode& code, EmitContext& ctx, I // tmp = (a >> (b - 1)) & 1 // res = (a >> b) + tmp // to add the value of the last bit to be shifted off to the result of the right shift - const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(code); + auto const right_shift = ctx.reg_alloc.ScratchXmm(code); code.vmovdqa(xmm0, code.BConst(xword, 1)); // find value of last bit to be shifted off @@ -3777,12 +4033,12 @@ void EmitX64::EmitVectorRoundingShiftLeftU64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignExtend8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pmovsxbw(a, a); ctx.reg_alloc.DefineValue(code, inst, a); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.pxor(result, result); code.punpcklbw(result, a); code.psraw(result, 8); @@ -3793,12 +4049,12 @@ void EmitX64::EmitVectorSignExtend8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignExtend16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pmovsxwd(a, a); ctx.reg_alloc.DefineValue(code, inst, a); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.pxor(result, result); code.punpcklwd(result, a); code.psrad(result, 16); @@ -3808,12 +4064,12 @@ void EmitX64::EmitVectorSignExtend16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignExtend32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { code.pmovsxdq(a, a); } else { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movaps(tmp, a); code.psrad(tmp, 31); @@ -3826,7 +4082,7 @@ void EmitX64::EmitVectorSignExtend32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignExtend64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); const Xbyak::Reg64 gpr_tmp = ctx.reg_alloc.ScratchGpr(code); code.movq(gpr_tmp, data); @@ -3835,7 +4091,7 @@ void EmitX64::EmitVectorSignExtend64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { code.pinsrq(data, gpr_tmp, 1); } else { - const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_tmp = ctx.reg_alloc.ScratchXmm(code); code.movq(xmm_tmp, gpr_tmp); code.punpcklqdq(data, xmm_tmp); @@ -3846,9 +4102,9 @@ void EmitX64::EmitVectorSignExtend64(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorSignedAbsoluteDifference(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); // only signed 16-bit min/max are available below SSE4.1 if (code.HasHostFeature(HostFeature::SSE41) || esize == 16) { @@ -3914,11 +4170,11 @@ void EmitX64::EmitVectorSignedMultiply16(EmitContext& ctx, IR::Inst* inst) { const auto lower_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetLowerFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); if (upper_inst) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmulhw(result, x, y); } else { @@ -3930,7 +4186,7 @@ void EmitX64::EmitVectorSignedMultiply16(EmitContext& ctx, IR::Inst* inst) { } if (lower_inst) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmullw(result, x, y); } else { @@ -3948,9 +4204,9 @@ void EmitX64::EmitVectorSignedMultiply32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (lower_inst && !upper_inst && code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmulld(result, x, y); @@ -3959,16 +4215,16 @@ void EmitX64::EmitVectorSignedMultiply32(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (lower_inst) { - const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(code); + auto const lower_result = ctx.reg_alloc.ScratchXmm(code); code.vpmulld(lower_result, x, y); ctx.reg_alloc.DefineValue(code, lower_inst, lower_result); } - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmuldq(result, x, y); code.vpsrlq(x, x, 32); @@ -3980,12 +4236,12 @@ void EmitX64::EmitVectorSignedMultiply32(EmitContext& ctx, IR::Inst* inst) { return; } - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm sign_correction = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + auto const sign_correction = ctx.reg_alloc.ScratchXmm(code); + auto const upper_result = ctx.reg_alloc.ScratchXmm(code); + auto const lower_result = ctx.reg_alloc.ScratchXmm(code); // calculate sign correction code.movdqa(tmp, x); @@ -4028,7 +4284,7 @@ void EmitX64::EmitVectorSignedMultiply32(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr(code).cvt32(); // SSE absolute value functions return an unsigned result @@ -4040,21 +4296,34 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo // or shift in sign bits to create a mask of (msb == 1 ? -1 : 0), then add to the result vector switch (esize) { case 8: { - VectorAbs8(code, ctx, data); + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pabsb(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.pxor(temp, temp); + code.psubb(temp, data); + code.pminub(data, temp); + } code.pmovmskb(bit, data); - code.pminub(data, code.BConst<8>(xword, 0x7F)); break; } case 16: { - VectorAbs16(code, ctx, data); + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pabsw(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.pxor(temp, temp); + code.psubw(temp, data); + code.pmaxsw(data, temp); + } code.pmovmskb(bit, data); code.and_(bit, 0xAAAA); // toggle mask bits that aren't the msb of an int16 to 0 if (code.HasHostFeature(HostFeature::SSE41)) { code.pminuw(data, code.BConst<16>(xword, 0x7FFF)); } else { - const Xbyak::Xmm tmp = xmm0; + auto const tmp = xmm0; code.movdqa(tmp, data); code.psraw(data, 15); code.paddw(data, tmp); @@ -4062,13 +4331,21 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo break; } case 32: { - VectorAbs32(code, ctx, data); + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pabsd(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(temp, data); + code.psrad(temp, 31); + code.pxor(data, temp); + code.psubd(data, temp); + } code.movmskps(bit, data); if (code.HasHostFeature(HostFeature::SSE41)) { code.pminud(data, code.BConst<32>(xword, 0x7FFFFFFF)); } else { - const Xbyak::Xmm tmp = xmm0; + auto const tmp = xmm0; code.movdqa(tmp, data); code.psrad(data, 31); code.paddd(data, tmp); @@ -4076,10 +4353,18 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo break; } case 64: { - VectorAbs64(code, ctx, data); + if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { + code.vpabsq(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.pshufd(temp, data, 0b11110101); + code.psrad(temp, 31); + code.pxor(data, temp); + code.psubq(data, temp); + } code.movmskpd(bit, data); - const Xbyak::Xmm tmp = xmm0; + auto const tmp = xmm0; if (code.HasHostFeature(HostFeature::SSE42)) { // create a -1 mask if msb is set code.pxor(tmp, tmp); @@ -4121,13 +4406,13 @@ template static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); code.movdqa(xmm0, y); ctx.reg_alloc.Release(y); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); switch (bit_width) { case 8: @@ -4184,7 +4469,7 @@ static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitC switch (bit_width) { case 8: if (code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqb(tmp2, tmp2); code.pxor(tmp, tmp); code.vpblendvb(xmm0, tmp, tmp2, xmm0); @@ -4264,10 +4549,10 @@ void EmitX64::EmitVectorSignedSaturatedAccumulateUnsigned64(EmitContext& ctx, IR template static void EmitVectorSignedSaturatedDoublingMultiply16(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm upper_tmp = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm lower_tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const upper_tmp = ctx.reg_alloc.ScratchXmm(code); + auto const lower_tmp = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmulhw(upper_tmp, x, y); @@ -4286,7 +4571,7 @@ static void EmitVectorSignedSaturatedDoublingMultiply16(BlockOfCode& code, EmitC ctx.reg_alloc.Release(x); ctx.reg_alloc.Release(y); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { if constexpr (is_rounding) { @@ -4336,10 +4621,10 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm odds = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm even = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const odds = ctx.reg_alloc.ScratchXmm(code); + auto const even = ctx.reg_alloc.ScratchXmm(code); code.vpmuldq(odds, x, y); code.vpsrlq(x, x, 32); @@ -4352,7 +4637,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& code.vpaddq(odds, odds, odds); code.vpaddq(even, even, even); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); if constexpr (is_rounding) { code.vmovdqa(result, code.Const(xword, 0x0000000080000000, 0x0000000080000000)); @@ -4363,7 +4648,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& code.vpsrlq(result, odds, 32); code.vblendps(result, result, even, 0b1010); - const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code); + auto const mask = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.vpcmpeqd(mask, result, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); @@ -4378,11 +4663,11 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& return; } - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm sign_correction = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + auto const sign_correction = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); // calculate sign correction code.movdqa(tmp, x); @@ -4441,8 +4726,8 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyHighRounding32(EmitContex void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.punpcklwd(x, x); code.punpcklwd(y, y); @@ -4467,8 +4752,8 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx, void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmovsxdq(x, x); @@ -4519,10 +4804,10 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx, static void EmitVectorSignedSaturatedNarrowToSigned(size_t original_esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm src = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm dest = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm reconstructed = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm sign = ctx.reg_alloc.ScratchXmm(code); + auto const src = ctx.reg_alloc.UseXmm(code, args[0]); + auto const dest = ctx.reg_alloc.ScratchXmm(code); + auto const reconstructed = ctx.reg_alloc.ScratchXmm(code); + auto const sign = ctx.reg_alloc.ScratchXmm(code); code.movdqa(dest, src); code.pxor(xmm0, xmm0); @@ -4579,9 +4864,9 @@ void EmitX64::EmitVectorSignedSaturatedNarrowToSigned64(EmitContext& ctx, IR::In static void EmitVectorSignedSaturatedNarrowToUnsigned(size_t original_esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm src = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm dest = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm reconstructed = ctx.reg_alloc.ScratchXmm(code); + auto const src = ctx.reg_alloc.UseXmm(code, args[0]); + auto const dest = ctx.reg_alloc.ScratchXmm(code); + auto const reconstructed = ctx.reg_alloc.ScratchXmm(code); code.movdqa(dest, src); code.pxor(xmm0, xmm0); @@ -4649,9 +4934,9 @@ void EmitX64::EmitVectorSignedSaturatedNarrowToUnsigned64(EmitContext& ctx, IR:: static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const data = ctx.reg_alloc.UseXmm(code, args[0]); + auto const zero = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Address mask = [esize, &code] { switch (esize) { case 8: @@ -4667,7 +4952,7 @@ static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitCo } }(); - const auto vector_equality = [esize, &code](const Xbyak::Xmm& x, const auto& y) { + const auto vector_equality = [esize, &code](auto const& x, const auto& y) { switch (esize) { case 8: code.pcmpeqb(x, y); @@ -4812,33 +5097,23 @@ void EmitX64::EmitVectorSignedSaturatedShiftLeft64(EmitContext& ctx, IR::Inst* i EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeft); } -template> +template static bool VectorSignedSaturatedShiftLeftUnsigned(VectorArray& dst, const VectorArray& data, u8 shift_amount) { + using U = std::make_unsigned_t; static_assert(std::is_signed_v, "T must be signed."); - bool qc_flag = false; for (size_t i = 0; i < dst.size(); i++) { - const T element = data[i]; - const T shift = static_cast(shift_amount); - - if (element == 0) { - dst[i] = 0; - } else if (element < 0) { - dst[i] = 0; - qc_flag = true; - } else { - const U shifted = static_cast(element) << static_cast(shift); - const U shifted_test = shifted >> static_cast(shift); - - if (shifted_test != static_cast(element)) { - dst[i] = static_cast((std::numeric_limits::max)()); - qc_flag = true; - } else { - dst[i] = shifted; - } - } + auto const element = data[i]; + auto const shifted = U(element) << U(T(shift_amount)); + auto const shifted_test = shifted >> U(T(shift_amount)); + auto result = 0; + if (element > 0 && shifted_test != U(element)) + result = T((std::numeric_limits::max)()); + if (element > 0 && shifted_test == U(element)) + result = shifted; + qc_flag |= element < 0 || (element > 0 && shifted_test != U(element)); + dst[i] = result; } - return qc_flag; } @@ -4851,7 +5126,97 @@ void EmitX64::EmitVectorSignedSaturatedShiftLeftUnsigned16(EmitContext& ctx, IR: } void EmitX64::EmitVectorSignedSaturatedShiftLeftUnsigned32(EmitContext& ctx, IR::Inst* inst) { - EmitTwoArgumentFallbackWithSaturationAndImmediate(code, ctx, inst, VectorSignedSaturatedShiftLeftUnsigned); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const imm8 = args[1].GetImmediateU8(); + if (code.HasHostFeature(HostFeature::AVX2)) { + auto const tmp_flag = ctx.reg_alloc.ScratchGpr(code); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + if (imm8 == 0) { + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.vpshufd(tmp1, tmp0, 85); + code.vpshufd(tmp2, tmp0, 238); + code.vpor(tmp1, tmp1, tmp2); + code.vpshufd(tmp2, tmp0, 255); + code.vpor(tmp2, tmp2, tmp0); + code.vpor(tmp1, tmp1, tmp2); + code.vmovd(tmp_flag.cvt32(), tmp1); + code.shr(tmp_flag.cvt32(), 31); + code.vpxor(tmp1, tmp1, tmp1); + code.vpmaxsd(tmp0, tmp0, tmp1); + } else { + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + auto const cmp_value = u32(1ULL << 31) >> (imm8 - 1); + code.vpshufd(tmp1, tmp0, 238); + code.vpor(tmp1, tmp1, tmp0); + code.vpshufd(tmp2, tmp1, 85); + code.vpor(tmp1, tmp1, tmp2); + code.vmovd(tmp_flag.cvt32(), tmp1); + code.cmp(tmp_flag.cvt32(), cmp_value); + code.vpslld(tmp1, tmp0, imm8); + code.vpbroadcastd(tmp2, code.Const(dword, cmp_value - 2)); + code.vpbroadcastd(tmp3, code.Const(dword, cmp_value - 1)); + code.vpcmpgtd(tmp3, tmp0, tmp3); + code.vpcmpeqd(tmp4, tmp4, tmp4); + code.vpaddd(tmp0, tmp0, tmp4); + code.vpminud(tmp2, tmp0, tmp2); + code.vpcmpeqd(tmp0, tmp0, tmp2); + code.vblendvps(tmp0, tmp3, tmp1, tmp0); + code.setae(tmp_flag.cvt8()); + } + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp_flag.cvt8()); + ctx.reg_alloc.DefineValue(code, inst, tmp0); + } else { + auto const tmp_flag = ctx.reg_alloc.ScratchGpr(code); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + if (imm8 == 0) { + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.pshufd(tmp1, tmp0, 85); + code.pshufd(tmp2, tmp0, 238); + code.por(tmp2, tmp1); + code.pshufd(tmp1, tmp0, 255); + code.por(tmp1, tmp0); + code.por(tmp1, tmp2); + code.movd(tmp_flag.cvt32(), tmp1); + code.shr(tmp_flag.cvt32(), 31); + code.pxor(tmp1, tmp1); + code.movdqa(tmp2, tmp0); + code.pcmpgtd(tmp2, tmp1); + code.pand(tmp0, tmp2); + } else { + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + u64 const cmp_value = u64(1ULL << 31) >> (imm8 - 1); + u64 const cmp_one = cmp_value - 1; + u64 const cmp_add = (cmp_value - 2) + 0x80000000; + code.pshufd(tmp1, tmp0, 238); + code.por(tmp1, tmp0); + code.pshufd(tmp2, tmp1, 85); + code.por(tmp2, tmp1); + code.movd(tmp_flag.cvt32(), tmp2); + code.cmp(tmp_flag.cvt32(), cmp_value); + code.movdqa(tmp1, tmp0); + code.pslld(tmp1, imm8); + code.movdqa(tmp2, tmp0); + code.pcmpgtd(tmp2, code.Const(xword, cmp_one | (cmp_one << 32), cmp_one | (cmp_one << 32))); + code.pcmpeqd(tmp3, tmp3); + code.paddd(tmp0, tmp3); + code.pxor(tmp0, code.Const(xword, 0x80000000'80000000, 0x80000000'80000000)); + code.pcmpgtd(tmp0, code.Const(xword, cmp_add | (cmp_add << 32), cmp_add | (cmp_add << 32))); + code.pand(tmp2, tmp0); + code.pandn(tmp0, tmp1); + code.por(tmp0, tmp2); + code.setae(tmp_flag.cvt8()); + } + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp_flag.cvt8()); + ctx.reg_alloc.DefineValue(code, inst, tmp0); +// EmitTwoArgumentFallbackWithSaturationAndImmediate(code, ctx, inst, VectorSignedSaturatedShiftLeftUnsigned); + } } void EmitX64::EmitVectorSignedSaturatedShiftLeftUnsigned64(EmitContext& ctx, IR::Inst* inst) { @@ -4889,7 +5254,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { const bool is_defaults_zero = inst->GetArg(0).IsZero(); if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI)) { - const Xbyak::Xmm indicies = table_size <= 2 ? ctx.reg_alloc.UseXmm(code, args[2]) : ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const indicies = table_size <= 2 ? ctx.reg_alloc.UseXmm(code, args[2]) : ctx.reg_alloc.UseScratchXmm(code, args[2]); const u64 index_count = mcl::bit::replicate_element(static_cast(table_size * 8)); @@ -4897,43 +5262,43 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { switch (table_size) { case 1: { - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); if (is_defaults_zero) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpermb(result | k1 | T_z, indicies, xmm_table0); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermb(result | k1, indicies, xmm_table0); ctx.reg_alloc.DefineValue(code, inst, result); } break; } case 2: { - const Xbyak::Xmm xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.vpunpcklqdq(xmm0, xmm_table0_lower, xmm_table0_upper); if (is_defaults_zero) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpermb(result | k1 | T_z, indicies, xmm0); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermb(result | k1, indicies, xmm0); ctx.reg_alloc.DefineValue(code, inst, result); } break; } case 3: { - const Xbyak::Xmm xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(code, table[2]); + auto const xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table1 = ctx.reg_alloc.UseXmm(code, table[2]); code.vpunpcklqdq(xmm0, xmm_table0_lower, xmm_table0_upper); if (is_defaults_zero) { code.vpermi2b(indicies | k1 | T_z, xmm0, xmm_table1); ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermi2b(indicies, xmm0, xmm_table1); code.vmovdqu8(result | k1, indicies); ctx.reg_alloc.DefineValue(code, inst, result); @@ -4941,17 +5306,17 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { break; } case 4: { - const Xbyak::Xmm xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); - const Xbyak::Xmm xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); + auto const xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); + auto const xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); code.vpunpcklqdq(xmm0, xmm_table0_lower, xmm_table0_upper); code.vpunpcklqdq(xmm_table1, xmm_table1, xmm_table1_upper); if (is_defaults_zero) { code.vpermi2b(indicies | k1 | T_z, xmm0, xmm_table1); ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermi2b(indicies, xmm0, xmm_table1); code.vmovdqu8(result | k1, indicies); ctx.reg_alloc.DefineValue(code, inst, result); @@ -4974,9 +5339,9 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { }; if (code.HasHostFeature(HostFeature::SSSE3) && is_defaults_zero && table_size == 1) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.xorps(result, result); code.movsd(result, xmm_table0); @@ -4988,9 +5353,9 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::SSSE3) && is_defaults_zero && table_size == 2) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.punpcklqdq(xmm_table0, xmm_table0_upper); code.paddusb(indicies, code.Const(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); @@ -5001,12 +5366,12 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::SSE41) && table_size <= 2) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const defaults = ctx.reg_alloc.UseXmm(code, args[0]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); if (table_size == 2) { - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.punpcklqdq(xmm_table0, xmm_table0_upper); ctx.reg_alloc.Release(xmm_table0_upper); } @@ -5025,12 +5390,12 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::SSE41) && is_defaults_zero) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); { - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.punpcklqdq(xmm_table0, xmm_table0_upper); ctx.reg_alloc.Release(xmm_table0_upper); } @@ -5039,7 +5404,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { code.punpcklqdq(xmm_table1, xmm0); } else { ASSERT(table_size == 4); - const Xbyak::Xmm xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); + auto const xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); code.punpcklqdq(xmm_table1, xmm_table1_upper); ctx.reg_alloc.Release(xmm_table1_upper); } @@ -5060,18 +5425,18 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const defaults = ctx.reg_alloc.UseXmm(code, args[0]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); { - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.punpcklqdq(xmm_table0, xmm_table0_upper); ctx.reg_alloc.Release(xmm_table0_upper); } if (table_size == 4) { - const Xbyak::Xmm xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); + auto const xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); code.punpcklqdq(xmm_table1, xmm_table1_upper); ctx.reg_alloc.Release(xmm_table1_upper); } @@ -5100,37 +5465,31 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { const u32 stack_space = static_cast(6 * 8); ctx.reg_alloc.AllocStackSpace(code, stack_space + ABI_SHADOW_SPACE); for (size_t i = 0; i < table_size; ++i) { - const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(code, table[i]); + auto const table_value = ctx.reg_alloc.UseXmm(code, table[i]); code.movq(qword[rsp + ABI_SHADOW_SPACE + i * 8], table_value); ctx.reg_alloc.Release(table_value); } - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const defaults = ctx.reg_alloc.UseXmm(code, args[0]); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); - code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 4 * 8]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 5 * 8]); code.mov(code.ABI_PARAM4.cvt32(), table_size); code.movq(qword[code.ABI_PARAM2], defaults); code.movq(qword[code.ABI_PARAM3], indicies); - - code.CallLambda( - [](const HalfVectorArray* table, HalfVectorArray& result, const HalfVectorArray& indicies, size_t table_size) { - for (size_t i = 0; i < result.size(); ++i) { - const size_t index = indicies[i] / table[0].size(); - const size_t elem = indicies[i] % table[0].size(); - if (index < table_size) { - result[i] = table[index][elem]; - } - } - }); - + code.CallLambda([](const HalfVectorArray* table, HalfVectorArray& result, const HalfVectorArray& indicies, size_t table_size) { + for (size_t i = 0; i < result.size(); ++i) { + const size_t index = indicies[i] / table[0].size(); + const size_t elem = indicies[i] % table[0].size(); + if (index < table_size) + result[i] = table[index][elem]; + } + }); code.movq(result, qword[rsp + ABI_SHADOW_SPACE + 4 * 8]); ctx.reg_alloc.ReleaseStackSpace(code, stack_space + ABI_SHADOW_SPACE); - ctx.reg_alloc.DefineValue(code, inst, result); } @@ -5144,14 +5503,14 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { const bool is_defaults_zero = !inst->GetArg(0).IsImmediate() && inst->GetArg(0).GetInst()->GetOpcode() == IR::Opcode::ZeroVector; if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI) && table_size == 4) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); code.vpcmpub(k1, indicies, code.BConst<8>(xword, 2 * 16), CmpInt::LessThan); code.vpcmpub(k2, indicies, code.BConst<8>(xword, 4 * 16), CmpInt::LessThan); // Handle vector-table 0,1 - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); code.vpermi2b(indicies | k1, xmm_table0, xmm_table1); @@ -5159,8 +5518,8 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.Release(xmm_table1); // Handle vector-table 2,3 - const Xbyak::Xmm xmm_table2 = ctx.reg_alloc.UseXmm(code, table[2]); - const Xbyak::Xmm xmm_table3 = ctx.reg_alloc.UseXmm(code, table[3]); + auto const xmm_table2 = ctx.reg_alloc.UseXmm(code, table[2]); + auto const xmm_table3 = ctx.reg_alloc.UseXmm(code, table[3]); code.kandnw(k1, k1, k2); code.vpermi2b(indicies | k1, xmm_table2, xmm_table3); @@ -5169,19 +5528,19 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { code.vmovdqu8(indicies | k2 | T_z, indicies); ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm defaults = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const defaults = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vmovdqu8(defaults | k2, indicies); ctx.reg_alloc.DefineValue(code, inst, defaults); } } else if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI) && table_size == 3) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); code.vpcmpub(k1, indicies, code.BConst<8>(xword, 2 * 16), CmpInt::LessThan); code.vpcmpub(k2, indicies, code.BConst<8>(xword, 3 * 16), CmpInt::LessThan); // Handle vector-table 0,1 - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); code.vpermi2b(indicies | k1, xmm_table0, xmm_table1); @@ -5189,7 +5548,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.Release(xmm_table1); // Handle vector-table 2 - const Xbyak::Xmm xmm_table2 = ctx.reg_alloc.UseXmm(code, table[2]); + auto const xmm_table2 = ctx.reg_alloc.UseXmm(code, table[2]); code.kandnw(k1, k1, k2); code.vpermb(indicies | k1, indicies, xmm_table2); @@ -5198,14 +5557,14 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { code.vmovdqu8(indicies | k2 | T_z, indicies); ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm defaults = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const defaults = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vmovdqu8(defaults | k2, indicies); ctx.reg_alloc.DefineValue(code, inst, defaults); } } else if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI) && table_size == 2) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); code.vpcmpub(k1, indicies, code.BConst<8>(xword, 2 * 16), CmpInt::LessThan); @@ -5213,36 +5572,36 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { code.vpermi2b(indicies | k1 | T_z, xmm_table0, xmm_table1); ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermi2b(indicies, xmm_table0, xmm_table1); code.vmovdqu8(result | k1, indicies); ctx.reg_alloc.DefineValue(code, inst, result); } } else if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI) && table_size == 1) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); code.vpcmpub(k1, indicies, code.BConst<8>(xword, 1 * 16), CmpInt::LessThan); if (is_defaults_zero) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpermb(result | k1 | T_z, indicies, xmm_table0); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermb(result | k1, indicies, xmm_table0); ctx.reg_alloc.DefineValue(code, inst, result); } } else if (code.HasHostFeature(HostFeature::SSSE3) && is_defaults_zero && table_size == 1) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); code.paddusb(indicies, code.Const(xword, 0x7070707070707070, 0x7070707070707070)); code.pshufb(xmm_table0, indicies); ctx.reg_alloc.DefineValue(code, inst, xmm_table0); } else if (code.HasHostFeature(HostFeature::SSE41) && table_size == 1) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const defaults = ctx.reg_alloc.UseXmm(code, args[0]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpaddusb(xmm0, indicies, code.Const(xword, 0x7070707070707070, 0x7070707070707070)); @@ -5255,9 +5614,9 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, xmm_table0); } else if (code.HasHostFeature(HostFeature::SSE41) && is_defaults_zero && table_size == 2) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[1]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[1]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpaddusb(xmm0, indicies, code.Const(xword, 0x7070707070707070, 0x7070707070707070)); @@ -5273,14 +5632,14 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, xmm_table0); return; } else if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm(code); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const masked = ctx.reg_alloc.ScratchXmm(code); code.vpandd(masked, indicies, code.Const(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); for (size_t i = 0; i < table_size; ++i) { - const Xbyak::Xmm xmm_table = ctx.reg_alloc.UseScratchXmm(code, table[i]); + auto const xmm_table = ctx.reg_alloc.UseScratchXmm(code, table[i]); const Xbyak::Opmask table_mask = k1; const u64 table_index = mcl::bit::replicate_element(i * 16); @@ -5297,15 +5656,15 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, result); } else if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm(code); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const masked = ctx.reg_alloc.ScratchXmm(code); code.movaps(masked, code.Const(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); code.pand(masked, indicies); for (size_t i = 0; i < table_size; ++i) { - const Xbyak::Xmm xmm_table = ctx.reg_alloc.UseScratchXmm(code, table[i]); + auto const xmm_table = ctx.reg_alloc.UseScratchXmm(code, table[i]); const u64 table_index = mcl::bit::replicate_element(i * 16); @@ -5329,13 +5688,13 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { const u32 stack_space = static_cast((table_size + 2) * 16); ctx.reg_alloc.AllocStackSpace(code, stack_space + ABI_SHADOW_SPACE); for (size_t i = 0; i < table_size; ++i) { - const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(code, table[i]); + auto const table_value = ctx.reg_alloc.UseXmm(code, table[i]); code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], table_value); ctx.reg_alloc.Release(table_value); } - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const defaults = ctx.reg_alloc.UseXmm(code, args[0]); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]); @@ -5362,8 +5721,8 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorTranspose8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm upper = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const upper = ctx.reg_alloc.UseScratchXmm(code, args[1]); const bool part = args[2].GetImmediateU1(); if (!part) { @@ -5381,8 +5740,8 @@ void EmitX64::EmitVectorTranspose8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorTranspose16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm upper = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const upper = ctx.reg_alloc.UseScratchXmm(code, args[1]); const bool part = args[2].GetImmediateU1(); if (!part) { @@ -5400,8 +5759,8 @@ void EmitX64::EmitVectorTranspose16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorTranspose32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm upper = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const upper = ctx.reg_alloc.UseXmm(code, args[1]); const bool part = args[2].GetImmediateU1(); code.shufps(lower, upper, !part ? 0b10001000 : 0b11011101); @@ -5413,8 +5772,8 @@ void EmitX64::EmitVectorTranspose32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorTranspose64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm upper = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const upper = ctx.reg_alloc.UseXmm(code, args[1]); const bool part = args[2].GetImmediateU1(); code.shufpd(lower, upper, !part ? 0b00 : 0b11); @@ -5422,89 +5781,87 @@ void EmitX64::EmitVectorTranspose64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, lower); } -static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { + +void EmitX64::EmitVectorUnsignedAbsoluteDifference8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); - - switch (esize) { - case 8: { - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - + if (code.HasHostFeature(HostFeature::AVX)) { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.vpminub(tmp2, tmp0, tmp1); + code.vpmaxub(tmp0, tmp0, tmp1); + code.vpsubb(tmp0, tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.movdqa(temp, x); code.psubusb(temp, y); code.psubusb(y, x); code.por(temp, y); - break; + ctx.reg_alloc.DefineValue(code, inst, temp); } - case 16: { - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); +} +void EmitX64::EmitVectorUnsignedAbsoluteDifference16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + if (code.HasHostFeature(HostFeature::AVX)) { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.vpminuw(tmp2, tmp0, tmp1); + code.vpmaxuw(tmp0, tmp0, tmp1); + code.vpsubw(tmp0, tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.movdqa(temp, x); code.psubusw(temp, y); code.psubusw(y, x); code.por(temp, y); - break; + ctx.reg_alloc.DefineValue(code, inst, temp); } - case 32: - // See https://stackoverflow.com/questions/3380785/compute-the-absolute-difference-between-unsigned-integers-using-sse/3527267#3527267 - if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - - code.movdqa(temp, x); - code.pminud(x, y); - code.pmaxud(temp, y); - code.psubd(temp, x); - } else { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - if (ctx.HasOptimization(OptimizationFlag::CodeSpeed)) { - // About 45 bytes - const Xbyak::Xmm temp_x = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm temp_y = ctx.reg_alloc.ScratchXmm(code); - code.pcmpeqd(temp, temp); - code.pslld(temp, 31); - code.movdqa(temp_x, x); - code.movdqa(temp_y, y); - code.paddd(temp_x, x); - code.paddd(temp_y, y); - code.pcmpgtd(temp_y, temp_x); - code.psubd(x, y); - code.pandn(temp, temp_y); - code.pxor(x, y); - code.psubd(x, y); - } else { - // Smaller code size - about 36 bytes - code.movdqa(temp, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); - code.pxor(x, temp); - code.pxor(y, temp); - code.movdqa(temp, x); - code.psubd(temp, y); - code.pcmpgtd(y, x); - code.psrld(y, 1); - code.pxor(temp, y); - code.psubd(temp, y); - } - } - break; - } - - ctx.reg_alloc.DefineValue(code, inst, temp); -} - -void EmitX64::EmitVectorUnsignedAbsoluteDifference8(EmitContext& ctx, IR::Inst* inst) { - EmitVectorUnsignedAbsoluteDifference(8, ctx, inst, code); -} - -void EmitX64::EmitVectorUnsignedAbsoluteDifference16(EmitContext& ctx, IR::Inst* inst) { - EmitVectorUnsignedAbsoluteDifference(16, ctx, inst, code); } void EmitX64::EmitVectorUnsignedAbsoluteDifference32(EmitContext& ctx, IR::Inst* inst) { - EmitVectorUnsignedAbsoluteDifference(32, ctx, inst, code); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + if (code.HasHostFeature(HostFeature::AVX)) { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.vpminud(tmp2, tmp0, tmp1); + code.vpmaxud(tmp0, tmp0, tmp1); + code.vpsubd(tmp0, tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); + } else if (code.HasHostFeature(HostFeature::SSE41)) { + // See https://stackoverflow.com/questions/3380785/compute-the-absolute-difference-between-unsigned-integers-using-sse/3527267#3527267 + auto const temp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + code.movdqa(temp, x); + code.pminud(x, y); + code.pmaxud(temp, y); + code.psubd(temp, x); + ctx.reg_alloc.DefineValue(code, inst, temp); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, code.Const(xword, 0x8000'00008000'0000, 0x8000'00008000'0000)); + code.movdqa(tmp3, tmp1); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp0); + code.pcmpgtd(tmp2, tmp3); + code.psubd(tmp0, tmp1); + code.pxor(tmp0, tmp2); + code.psubd(tmp2, tmp0); + //code.movdqa(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp2); + } } void EmitX64::EmitVectorUnsignedMultiply16(EmitContext& ctx, IR::Inst* inst) { @@ -5512,11 +5869,11 @@ void EmitX64::EmitVectorUnsignedMultiply16(EmitContext& ctx, IR::Inst* inst) { const auto lower_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetLowerFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); if (upper_inst) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmulhuw(result, x, y); } else { @@ -5528,7 +5885,7 @@ void EmitX64::EmitVectorUnsignedMultiply16(EmitContext& ctx, IR::Inst* inst) { } if (lower_inst) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmullw(result, x, y); } else { @@ -5546,24 +5903,24 @@ void EmitX64::EmitVectorUnsignedMultiply32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (lower_inst && !upper_inst && code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmulld(result, x, y); ctx.reg_alloc.DefineValue(code, lower_inst, result); } else if (code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (lower_inst) { - const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(code); + auto const lower_result = ctx.reg_alloc.ScratchXmm(code); code.vpmulld(lower_result, x, y); ctx.reg_alloc.DefineValue(code, lower_inst, lower_result); } - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmuludq(result, x, y); code.vpsrlq(x, x, 32); @@ -5573,11 +5930,11 @@ void EmitX64::EmitVectorUnsignedMultiply32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, upper_inst, result); } else { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm upper_result = upper_inst ? ctx.reg_alloc.ScratchXmm(code) : Xbyak::Xmm{-1}; - const Xbyak::Xmm lower_result = lower_inst ? ctx.reg_alloc.ScratchXmm(code) : Xbyak::Xmm{-1}; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + auto const upper_result = upper_inst ? ctx.reg_alloc.ScratchXmm(code) : Xbyak::Xmm{-1}; + auto const lower_result = lower_inst ? ctx.reg_alloc.ScratchXmm(code) : Xbyak::Xmm{-1}; // calculate unsigned multiply code.movdqa(tmp, x); @@ -5794,11 +6151,11 @@ void EmitX64::EmitVectorUnsignedSaturatedShiftLeft64(EmitContext& ctx, IR::Inst* void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { code.pmovzxbw(a, a); } else { - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.punpcklbw(a, zeros); } @@ -5807,11 +6164,11 @@ void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorZeroExtend16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { code.pmovzxwd(a, a); } else { - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.punpcklwd(a, zeros); } @@ -5820,11 +6177,11 @@ void EmitX64::EmitVectorZeroExtend16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorZeroExtend32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { code.pmovzxdq(a, a); } else { - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.punpckldq(a, zeros); } @@ -5833,8 +6190,8 @@ void EmitX64::EmitVectorZeroExtend32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorZeroExtend64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.punpcklqdq(a, zeros); ctx.reg_alloc.DefineValue(code, inst, a); @@ -5842,7 +6199,7 @@ void EmitX64::EmitVectorZeroExtend64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorZeroUpper(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.movq(a, a); // TODO: !IsLastUse @@ -5850,7 +6207,7 @@ void EmitX64::EmitVectorZeroUpper(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitZeroVector(EmitContext& ctx, IR::Inst* inst) { - const Xbyak::Xmm a = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.ScratchXmm(code); code.pxor(a, a); ctx.reg_alloc.DefineValue(code, inst, a); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp index 2247b18fcd..046ecc78d6 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp @@ -13,14 +13,8 @@ #include #include "dynarmic/common/assert.h" -#include -#include -#include -#include -#include -#include -#include -#include +#include "dynarmic/mcl/function_info.hpp" +#include "dynarmic/mcl/integer_of_size.hpp" #include "dynarmic/backend/x64/xbyak.h" #include "dynarmic/backend/x64/abi.h" @@ -30,8 +24,8 @@ #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/op.h" +#include "dynarmic/common/fp/rounding_mode.h" #include "dynarmic/common/fp/util.h" -#include "dynarmic/common/lut_from_list.h" #include "dynarmic/interface/optimization_flags.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" @@ -42,7 +36,6 @@ namespace Dynarmic::Backend::X64 { using namespace Xbyak::util; -namespace mp = mcl::mp; namespace { @@ -101,7 +94,7 @@ void HandleNaNs(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, std:: code.cmp(bitmask, 0); } - SharedLabel end = GenSharedLabel(), nan = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(), nan = ctx.GenSharedLabel(); code.jnz(*nan, code.T_NEAR); code.L(*end); @@ -196,23 +189,6 @@ void ForceToDefaultNaN(BlockOfCode& code, FP::FPCR fpcr, Xbyak::Xmm result) { } } -template -void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm result) { - const Xbyak::Xmm nan_mask = xmm0; - if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { - constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero, - FpFixup::PosZero); - FCODE(vfixupimmp)(result, result, code.BConst<32>(ptr_b, nan_to_zero), u8(0)); - } else if (code.HasHostFeature(HostFeature::AVX)) { - FCODE(vcmpordp)(nan_mask, result, result); - FCODE(vandp)(result, result, nan_mask); - } else { - code.movaps(nan_mask, result); - FCODE(cmpordp)(nan_mask, nan_mask); - code.andps(result, nan_mask); - } -} - template void DenormalsAreZero(BlockOfCode& code, FP::FPCR fpcr, std::initializer_list to_daz, Xbyak::Xmm tmp) { if (fpcr.FZ()) { @@ -1338,7 +1314,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel(); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { code.movaps(result, xmm_a); @@ -1611,7 +1587,7 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel(); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { code.movaps(result, GetVectorOf(code)); @@ -1784,7 +1760,7 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(code); - SharedLabel bad_values = GenSharedLabel(), end = GenSharedLabel(); + SharedLabel bad_values = ctx.GenSharedLabel(), end = ctx.GenSharedLabel(); code.movaps(value, operand); @@ -1875,7 +1851,7 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code); - SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel(); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { code.vmovaps(result, GetVectorOf(code)); @@ -2001,6 +1977,7 @@ void EmitX64::EmitFPVectorToHalf32(EmitContext& ctx, IR::Inst* inst) { } } + // Assembly thunk; just remember not to specialise too much otherwise i-cache death! // template // static void EmitFPVectorToFixedThunk(VectorArray& output, const VectorArray& input, FP::FPCR fpcr, FP::FPSR& fpsr) { @@ -2011,144 +1988,161 @@ void EmitX64::EmitFPVectorToHalf32(EmitContext& ctx, IR::Inst* inst) { template void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const size_t fbits = inst->GetArg(1).GetU8(); - const auto rounding = static_cast(inst->GetArg(2).GetU8()); + const auto rounding = FP::RoundingMode(inst->GetArg(2).GetU8()); [[maybe_unused]] const bool fpcr_controlled = inst->GetArg(3).GetU1(); - if constexpr (fsize != 16) { - if (code.HasHostFeature(HostFeature::SSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(code, args[0]); - - MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { - const int round_imm = [&] { - switch (rounding) { - case FP::RoundingMode::ToNearest_TieEven: - default: - return 0b00; - case FP::RoundingMode::TowardsPlusInfinity: - return 0b10; - case FP::RoundingMode::TowardsMinusInfinity: - return 0b01; - case FP::RoundingMode::TowardsZero: - return 0b11; - } - }(); - - const auto perform_conversion = [&code, &ctx](const Xbyak::Xmm& src) { - // MSVC doesn't allow us to use a [&] capture, so we have to do this instead. - (void)ctx; - - if constexpr (fsize == 32) { - code.cvttps2dq(src, src); - } else { - if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { - code.vcvttpd2qq(src, src); - } else { - const Xbyak::Reg64 hi = ctx.reg_alloc.ScratchGpr(code); - const Xbyak::Reg64 lo = ctx.reg_alloc.ScratchGpr(code); - - code.cvttsd2si(lo, src); - code.punpckhqdq(src, src); - code.cvttsd2si(hi, src); - code.movq(src, lo); - code.pinsrq(src, hi, 1); - - ctx.reg_alloc.Release(hi); - ctx.reg_alloc.Release(lo); - } - } - }; - - if (fbits != 0) { - const u64 scale_factor = fsize == 32 - ? static_cast(fbits + 127) << 23 - : static_cast(fbits + 1023) << 52; - FCODE(mulp)(src, GetVectorOf(code, scale_factor)); + if (code.HasHostFeature(HostFeature::SSE41) && fsize != 16 && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(code, args[0]); + MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { + const int round_imm = [&] { + switch (rounding) { + case FP::RoundingMode::ToNearest_TieEven: + default: + return 0b00; + case FP::RoundingMode::TowardsPlusInfinity: + return 0b10; + case FP::RoundingMode::TowardsMinusInfinity: + return 0b01; + case FP::RoundingMode::TowardsZero: + return 0b11; } + }(); + const auto perform_conversion = [&code, &ctx](const Xbyak::Xmm& src) { + // MSVC doesn't allow us to use a [&] capture, so we have to do this instead. + (void)ctx; - FCODE(roundp)(src, src, static_cast(round_imm)); - ZeroIfNaN(code, src); - - constexpr u64 float_upper_limit_signed = fsize == 32 ? 0x4f000000 : 0x43e0000000000000; - [[maybe_unused]] constexpr u64 float_upper_limit_unsigned = fsize == 32 ? 0x4f800000 : 0x43f0000000000000; - - if constexpr (unsigned_) { + if constexpr (fsize == 32) { + code.cvttps2dq(src, src); + } else { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { - // Mask positive values - code.xorps(xmm0, xmm0); - FCODE(vcmpp)(k1, src, xmm0, Cmp::GreaterEqual_OQ); - - // Convert positive values to unsigned integers, write 0 anywhere else - // vcvttp*2u*q already saturates out-of-range values to (0xFFFF...) - if constexpr (fsize == 32) { - code.vcvttps2udq(src | k1 | T_z, src); - } else { - code.vcvttpd2uqq(src | k1 | T_z, src); - } + code.vcvttpd2qq(src, src); } else { - // Zero is minimum - code.xorps(xmm0, xmm0); - FCODE(cmplep)(xmm0, src); - FCODE(andp)(src, xmm0); + const Xbyak::Reg64 hi = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 lo = ctx.reg_alloc.ScratchGpr(code); - // Will we exceed unsigned range? - const Xbyak::Xmm exceed_unsigned = ctx.reg_alloc.ScratchXmm(code); - code.movaps(exceed_unsigned, GetVectorOf(code)); - FCODE(cmplep)(exceed_unsigned, src); + code.cvttsd2si(lo, src); + code.punpckhqdq(src, src); + code.cvttsd2si(hi, src); + code.movq(src, lo); + code.pinsrq(src, hi, 1); - // Will be exceed signed range? - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - code.movaps(tmp, GetVectorOf(code)); - code.movaps(xmm0, tmp); - FCODE(cmplep)(xmm0, src); - FCODE(andp)(tmp, xmm0); - FCODE(subp)(src, tmp); - perform_conversion(src); - ICODE(psll)(xmm0, u8(fsize - 1)); - FCODE(orp)(src, xmm0); + ctx.reg_alloc.Release(hi); + ctx.reg_alloc.Release(lo); + } + } + }; + if (fbits != 0) { + const u64 scale_factor = fsize == 32 + ? u64(fbits + 127) << 23 + : u64(fbits + 1023) << 52; + FCODE(mulp)(src, GetVectorOf(code, scale_factor)); + } - // Saturate to max - FCODE(orp)(src, exceed_unsigned); + FCODE(roundp)(src, src, u8(round_imm)); + const Xbyak::Xmm nan_mask = xmm0; + if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { + static constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero, FpFixup::PosZero); + FCODE(vfixupimmp)(src, src, code.BConst<32>(ptr_b, nan_to_zero), u8(0)); + } else if (code.HasHostFeature(HostFeature::AVX)) { + FCODE(vcmpordp)(nan_mask, src, src); + FCODE(vandp)(src, src, nan_mask); + } else { + code.movaps(nan_mask, src); + FCODE(cmpordp)(nan_mask, nan_mask); + code.andps(src, nan_mask); + } + + constexpr u64 float_upper_limit_signed = fsize == 32 ? 0x4f000000 : 0x43e0000000000000; + [[maybe_unused]] constexpr u64 float_upper_limit_unsigned = fsize == 32 ? 0x4f800000 : 0x43f0000000000000; + + if constexpr (unsigned_) { + if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { + // Mask positive values + code.xorps(xmm0, xmm0); + FCODE(vcmpp)(k1, src, xmm0, Cmp::GreaterEqual_OQ); + + // Convert positive values to unsigned integers, write 0 anywhere else + // vcvttp*2u*q already saturates out-of-range values to (0xFFFF...) + if (fsize == 32) { + code.vcvttps2udq(src | k1 | T_z, src); + } else { + code.vcvttpd2uqq(src | k1 | T_z, src); } } else { - using FPT = mcl::unsigned_integer_of_size; // WORKAROUND: For issue 678 on MSVC - constexpr u64 integer_max = FPT((std::numeric_limits>>::max)()); - - code.movaps(xmm0, GetVectorOf(code)); + // Zero is minimum + code.xorps(xmm0, xmm0); FCODE(cmplep)(xmm0, src); - perform_conversion(src); - FCODE(blendvp)(src, GetVectorOf(code)); - } - }); + FCODE(andp)(src, xmm0); - ctx.reg_alloc.DefineValue(code, inst, src); - return; - } + // Will we exceed unsigned range? + const Xbyak::Xmm exceed_unsigned = ctx.reg_alloc.ScratchXmm(code); + code.movaps(exceed_unsigned, GetVectorOf(code)); + FCODE(cmplep)(exceed_unsigned, src); + + // Will be exceed signed range? + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + code.movaps(tmp, GetVectorOf(code)); + code.movaps(xmm0, tmp); + FCODE(cmplep)(xmm0, src); + FCODE(andp)(tmp, xmm0); + FCODE(subp)(src, tmp); + perform_conversion(src); + ICODE(psll)(xmm0, u8(fsize - 1)); + FCODE(orp)(src, xmm0); + + // Saturate to max + FCODE(orp)(src, exceed_unsigned); + } + } else { + using FPT = mcl::unsigned_integer_of_size; // WORKAROUND: For issue 678 on MSVC + constexpr u64 integer_max = FPT((std::numeric_limits>>::max)()); + code.movaps(xmm0, GetVectorOf(code)); + FCODE(cmplep)(xmm0, src); + perform_conversion(src); + FCODE(blendvp)(src, GetVectorOf(code)); + } + }); + ctx.reg_alloc.DefineValue(code, inst, src); + return; } - using fbits_list = mp::lift_sequence>; - using rounding_list = mp::list< - mp::lift_value, - mp::lift_value, - mp::lift_value, - mp::lift_value, - mp::lift_value>; - - static const auto lut = Common::GenerateLookupTableFromList([](I) { - using FPT = mcl::unsigned_integer_of_size; // WORKAROUND: For issue 678 on MSVC - return std::pair{ - mp::lower_to_tuple_v, - Common::FptrCast([](VectorArray& output, const VectorArray& input, FP::FPCR fpcr, FP::FPSR& fpsr) { - constexpr size_t fbits = mp::get<0, I>::value; - constexpr FP::RoundingMode rounding_mode = mp::get<1, I>::value; + using FPT = mcl::unsigned_integer_of_size; // WORKAROUND: For issue 678 on MSVC + auto const func = [rounding]() -> void(*)(VectorArray& output, const VectorArray& input, FP::FPCR fpcr, FP::FPSR& fpsr) { + switch (rounding) { + case FP::RoundingMode::ToNearest_TieEven: + return [](VectorArray& output, const VectorArray& input, FP::FPCR fpcr, FP::FPSR& fpsr) { for (size_t i = 0; i < output.size(); ++i) - output[i] = FPT(FP::FPToFixed(fsize, input[i], fbits, unsigned_, fpcr, rounding_mode, fpsr)); - }) - }; - }, mp::cartesian_product{}); - - EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(fbits, rounding))); + output[i] = FPT(FP::FPToFixed(fsize, input[i], fsize, unsigned_, fpcr, FP::RoundingMode::ToNearest_TieEven, fpsr)); + }; + case FP::RoundingMode::TowardsPlusInfinity: + return [](VectorArray& output, const VectorArray& input, FP::FPCR fpcr, FP::FPSR& fpsr) { + for (size_t i = 0; i < output.size(); ++i) + output[i] = FPT(FP::FPToFixed(fsize, input[i], fsize, unsigned_, fpcr, FP::RoundingMode::TowardsPlusInfinity, fpsr)); + }; + case FP::RoundingMode::TowardsMinusInfinity: + return [](VectorArray& output, const VectorArray& input, FP::FPCR fpcr, FP::FPSR& fpsr) { + for (size_t i = 0; i < output.size(); ++i) + output[i] = FPT(FP::FPToFixed(fsize, input[i], fsize, unsigned_, fpcr, FP::RoundingMode::TowardsMinusInfinity, fpsr)); + }; + case FP::RoundingMode::TowardsZero: + return [](VectorArray& output, const VectorArray& input, FP::FPCR fpcr, FP::FPSR& fpsr) { + for (size_t i = 0; i < output.size(); ++i) + output[i] = FPT(FP::FPToFixed(fsize, input[i], fsize, unsigned_, fpcr, FP::RoundingMode::TowardsZero, fpsr)); + }; + case FP::RoundingMode::ToNearest_TieAwayFromZero: + return [](VectorArray& output, const VectorArray& input, FP::FPCR fpcr, FP::FPSR& fpsr) { + for (size_t i = 0; i < output.size(); ++i) + output[i] = FPT(FP::FPToFixed(fsize, input[i], fsize, unsigned_, fpcr, FP::RoundingMode::ToNearest_TieAwayFromZero, fpsr)); + }; + case FP::RoundingMode::ToOdd: + return [](VectorArray& output, const VectorArray& input, FP::FPCR fpcr, FP::FPSR& fpsr) { + for (size_t i = 0; i < output.size(); ++i) + output[i] = FPT(FP::FPToFixed(fsize, input[i], fsize, unsigned_, fpcr, FP::RoundingMode::ToOdd, fpsr)); + }; + } + }(); + EmitTwoOpFallback<3>(code, ctx, inst, func); } void EmitX64::EmitFPVectorToSignedFixed16(EmitContext& ctx, IR::Inst* inst) { diff --git a/src/dynarmic/src/dynarmic/backend/x64/exception_handler_windows.cpp b/src/dynarmic/src/dynarmic/backend/x64/exception_handler_windows.cpp index 3ae553bccd..bae397ff2b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/exception_handler_windows.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/exception_handler_windows.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -176,7 +176,7 @@ struct ExceptionHandler::Impl final { code.align(16); const u8* exception_handler_without_cb = code.getCurr(); - code.mov(code.eax, static_cast(ExceptionContinueSearch)); + code.mov(code.eax, u32(ExceptionContinueSearch)); code.ret(); code.align(16); @@ -192,20 +192,18 @@ struct ExceptionHandler::Impl final { code.lea(code.rsp, code.ptr[code.rsp - 8]); code.mov(code.ABI_PARAM1, std::bit_cast(&cb)); code.mov(code.ABI_PARAM2, code.ABI_PARAM3); - code.CallLambda( - [](const std::function& cb_, PCONTEXT ctx) { - FakeCall fc = cb_(ctx->Rip); - - ctx->Rsp -= sizeof(u64); - *std::bit_cast(ctx->Rsp) = fc.ret_rip; - ctx->Rip = fc.call_rip; - }); + code.CallLambda([](const std::function& cb_, PCONTEXT ctx) { + FakeCall fc = cb_(ctx->Rip); + ctx->Rsp -= sizeof(u64); + *std::bit_cast(ctx->Rsp) = fc.ret_rip; + ctx->Rip = fc.call_rip; + }); code.add(code.rsp, 8); - code.mov(code.eax, static_cast(ExceptionContinueExecution)); + code.mov(code.eax, u32(ExceptionContinueExecution)); code.ret(); - exception_handler_without_cb_offset = static_cast(exception_handler_without_cb - code.getCode()); - exception_handler_with_cb_offset = static_cast(exception_handler_with_cb - code.getCode()); + exception_handler_without_cb_offset = ULONG(exception_handler_without_cb - code.getCode()); + exception_handler_with_cb_offset = ULONG(exception_handler_with_cb - code.getCode()); code.align(16); UNWIND_INFO* unwind_info = static_cast(code.AllocateFromCodeSpace(sizeof(UNWIND_INFO))); diff --git a/src/dynarmic/src/dynarmic/backend/x64/perf_map.cpp b/src/dynarmic/src/dynarmic/backend/x64/perf_map.cpp index a0b5c4fdde..095d6194f6 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/perf_map.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/perf_map.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -6,17 +6,16 @@ * SPDX-License-Identifier: 0BSD */ -#include "dynarmic/backend/x64/perf_map.h" - #include #include +#include +#include "dynarmic/backend/x64/perf_map.h" +#include "dynarmic/common/common_types.h" #if defined(__linux__) && !defined(__ANDROID__) # include # include # include -# include -# include # include # include diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 5c5ed25131..2cfa14ae18 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -417,7 +417,8 @@ HostLoc RegAlloc::SelectARegister(std::bitset<32> desired_locations) const noexc // While R13 and R14 are technically available, we avoid allocating for them // at all costs, because theoretically skipping them is better than spilling // all over the place - i also fixes bugs with high reg pressure - } else if (i >= HostLoc::R13 && i <= HostLoc::R15) { + // %rbp must not be trashed, so skip it as well + } else if (i == HostLoc::RBP || (i >= HostLoc::R13 && i <= HostLoc::R15)) { // skip, do not touch // Intel recommends to reuse registers as soon as they're overwritable (DO NOT SPILL) } else if (loc_info.IsEmpty()) { diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index 8b872a0e9c..746d6b723f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -50,7 +50,7 @@ public: } inline void ReadLock() noexcept { ASSERT(size_t(is_being_used_count) + 1 < (std::numeric_limits::max)()); - ASSERT(!is_scratch); + ASSERT(!bool(is_scratch)); is_being_used_count++; } inline void WriteLock() noexcept { diff --git a/src/dynarmic/src/dynarmic/backend/x64/stack_layout.h b/src/dynarmic/src/dynarmic/backend/x64/stack_layout.h index 50737f12eb..43a3fc7ab2 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/stack_layout.h +++ b/src/dynarmic/src/dynarmic/backend/x64/stack_layout.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -22,14 +22,13 @@ constexpr size_t SpillCount = 64; #endif struct alignas(16) StackLayout { + // Needs alignment for VMOV and XMM spills + alignas(16) std::array, SpillCount> spill; s64 cycles_remaining; s64 cycles_to_run; - - std::array, SpillCount> spill; - u32 save_host_MXCSR; - bool check_bit; + u64 abi_base_pointer; }; #ifdef _MSC_VER diff --git a/src/dynarmic/src/dynarmic/common/cast_util.h b/src/dynarmic/src/dynarmic/common/cast_util.h index 92c9a259b3..aed9067977 100644 --- a/src/dynarmic/src/dynarmic/common/cast_util.h +++ b/src/dynarmic/src/dynarmic/common/cast_util.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2018 MerryMage * SPDX-License-Identifier: 0BSD @@ -5,7 +8,7 @@ #pragma once -#include +#include "dynarmic/mcl/function_info.hpp" namespace Dynarmic::Common { diff --git a/src/dynarmic/src/dynarmic/common/fp/fpcr.h b/src/dynarmic/src/dynarmic/common/fp/fpcr.h index be963a2099..948917bc35 100644 --- a/src/dynarmic/src/dynarmic/common/fp/fpcr.h +++ b/src/dynarmic/src/dynarmic/common/fp/fpcr.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -11,7 +11,7 @@ #include #include "dynarmic/common/assert.h" -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/common/fp/rounding_mode.h" diff --git a/src/dynarmic/src/dynarmic/common/fp/fpsr.h b/src/dynarmic/src/dynarmic/common/fp/fpsr.h index 9308132879..caa5cb92c7 100644 --- a/src/dynarmic/src/dynarmic/common/fp/fpsr.h +++ b/src/dynarmic/src/dynarmic/common/fp/fpsr.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -8,7 +8,7 @@ #pragma once -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" namespace Dynarmic::FP { diff --git a/src/dynarmic/src/dynarmic/common/fp/fused.cpp b/src/dynarmic/src/dynarmic/common/fp/fused.cpp index d1197b4a42..d16eb44791 100644 --- a/src/dynarmic/src/dynarmic/common/fp/fused.cpp +++ b/src/dynarmic/src/dynarmic/common/fp/fused.cpp @@ -8,7 +8,7 @@ #include "dynarmic/common/fp/fused.h" -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/fp/unpacked.h" #include "dynarmic/common/u128.h" diff --git a/src/dynarmic/src/dynarmic/common/fp/info.h b/src/dynarmic/src/dynarmic/common/fp/info.h index 3969843f0f..eebca0fc0c 100644 --- a/src/dynarmic/src/dynarmic/common/fp/info.h +++ b/src/dynarmic/src/dynarmic/common/fp/info.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -8,7 +8,7 @@ #pragma once -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" namespace Dynarmic::FP { diff --git a/src/dynarmic/src/dynarmic/common/fp/mantissa_util.h b/src/dynarmic/src/dynarmic/common/fp/mantissa_util.h index 3999f97c27..43bb5fe604 100644 --- a/src/dynarmic/src/dynarmic/common/fp/mantissa_util.h +++ b/src/dynarmic/src/dynarmic/common/fp/mantissa_util.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -8,8 +8,7 @@ #pragma once -#include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" namespace Dynarmic::FP { diff --git a/src/dynarmic/src/dynarmic/common/fp/op/FPConvert.cpp b/src/dynarmic/src/dynarmic/common/fp/op/FPConvert.cpp index 906aa781a1..82803f715d 100644 --- a/src/dynarmic/src/dynarmic/common/fp/op/FPConvert.cpp +++ b/src/dynarmic/src/dynarmic/common/fp/op/FPConvert.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -8,8 +8,7 @@ #include "dynarmic/common/fp/op/FPConvert.h" -#include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/common/fp/fpcr.h" diff --git a/src/dynarmic/src/dynarmic/common/fp/op/FPRecipExponent.cpp b/src/dynarmic/src/dynarmic/common/fp/op/FPRecipExponent.cpp index e932e02803..332870eb8a 100644 --- a/src/dynarmic/src/dynarmic/common/fp/op/FPRecipExponent.cpp +++ b/src/dynarmic/src/dynarmic/common/fp/op/FPRecipExponent.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -8,7 +8,7 @@ #include "dynarmic/common/fp/op/FPRecipExponent.h" -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/common/fp/fpcr.h" diff --git a/src/dynarmic/src/dynarmic/common/fp/op/FPRoundInt.cpp b/src/dynarmic/src/dynarmic/common/fp/op/FPRoundInt.cpp index 198a71c807..4bcfcd7c8a 100644 --- a/src/dynarmic/src/dynarmic/common/fp/op/FPRoundInt.cpp +++ b/src/dynarmic/src/dynarmic/common/fp/op/FPRoundInt.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -9,7 +9,7 @@ #include "dynarmic/common/fp/op/FPRoundInt.h" #include "dynarmic/common/assert.h" -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/common/fp/fpcr.h" diff --git a/src/dynarmic/src/dynarmic/common/fp/op/FPToFixed.cpp b/src/dynarmic/src/dynarmic/common/fp/op/FPToFixed.cpp index 8e3474952a..2f37797b70 100644 --- a/src/dynarmic/src/dynarmic/common/fp/op/FPToFixed.cpp +++ b/src/dynarmic/src/dynarmic/common/fp/op/FPToFixed.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -10,8 +10,7 @@ #include #include "dynarmic/common/assert.h" -#include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/common/fp/fpcr.h" diff --git a/src/dynarmic/src/dynarmic/common/fp/process_nan.cpp b/src/dynarmic/src/dynarmic/common/fp/process_nan.cpp index ff1b09f4ba..7f47852d98 100644 --- a/src/dynarmic/src/dynarmic/common/fp/process_nan.cpp +++ b/src/dynarmic/src/dynarmic/common/fp/process_nan.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -11,7 +11,7 @@ #include #include "dynarmic/common/assert.h" -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpsr.h" diff --git a/src/dynarmic/src/dynarmic/common/fp/unpacked.cpp b/src/dynarmic/src/dynarmic/common/fp/unpacked.cpp index f853ab07e1..d6bb615cb4 100644 --- a/src/dynarmic/src/dynarmic/common/fp/unpacked.cpp +++ b/src/dynarmic/src/dynarmic/common/fp/unpacked.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2018 MerryMage * SPDX-License-Identifier: 0BSD @@ -7,8 +10,7 @@ #include -#include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/info.h" diff --git a/src/dynarmic/src/dynarmic/common/fp/unpacked.h b/src/dynarmic/src/dynarmic/common/fp/unpacked.h index 49dca74304..effc604fb0 100644 --- a/src/dynarmic/src/dynarmic/common/fp/unpacked.h +++ b/src/dynarmic/src/dynarmic/common/fp/unpacked.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -10,7 +10,7 @@ #include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/common/fp/fpcr.h" diff --git a/src/dynarmic/src/dynarmic/common/lut_from_list.h b/src/dynarmic/src/dynarmic/common/lut_from_list.h deleted file mode 100644 index 633b62aeda..0000000000 --- a/src/dynarmic/src/dynarmic/common/lut_from_list.h +++ /dev/null @@ -1,55 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2018 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include -#include - -#include -#include -#include - -#ifdef _MSC_VER -# include -#endif - -namespace Dynarmic::Common { - -// prevents this function from printing 56,000 character warning messages -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wno-stack-usage" -#endif -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wno-stack-usage" -#endif - -template -inline auto GenerateLookupTableFromList(Function f, mcl::mp::list) { -#ifdef _MSC_VER - using PairT = std::invoke_result_t>>; -#else - using PairT = std::common_type_t...>; -#endif - using MapT = mcl::mp::apply; - static_assert(mcl::is_instance_of_template_v); - const std::initializer_list pair_array{f(Values{})...}; - return MapT(pair_array.begin(), pair_array.end()); -} - -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif -#ifdef __clang__ -#pragma clang diagnostic pop -#endif - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/common/safe_ops.h b/src/dynarmic/src/dynarmic/common/safe_ops.h index f494bdb24b..5cc8cb5049 100644 --- a/src/dynarmic/src/dynarmic/common/safe_ops.h +++ b/src/dynarmic/src/dynarmic/common/safe_ops.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -10,7 +10,7 @@ #include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/common/u128.h" diff --git a/src/dynarmic/src/dynarmic/common/u128.h b/src/dynarmic/src/dynarmic/common/u128.h index 4e4bb3cf10..4fa842b3cd 100644 --- a/src/dynarmic/src/dynarmic/common/u128.h +++ b/src/dynarmic/src/dynarmic/common/u128.h @@ -11,8 +11,7 @@ #include #include -#include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" namespace Dynarmic { diff --git a/src/dynarmic/src/dynarmic/frontend/A32/FPSCR.h b/src/dynarmic/src/dynarmic/frontend/A32/FPSCR.h index 7a4c738e9e..7e0532ee93 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/FPSCR.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/FPSCR.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -10,7 +10,7 @@ #include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/common/fp/rounding_mode.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/ITState.h b/src/dynarmic/src/dynarmic/frontend/A32/ITState.h index eeddaeb6b5..baabc1ca15 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/ITState.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/ITState.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -8,7 +8,7 @@ #pragma once -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/ir/cond.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/PSR.h b/src/dynarmic/src/dynarmic/frontend/A32/PSR.h index 328fdb17c0..16ca86aeac 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/PSR.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/PSR.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -8,7 +8,7 @@ #pragma once -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/frontend/A32/ITState.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.cpp b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.cpp index a47ce0b78b..704179ecfb 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2016 MerryMage * SPDX-License-Identifier: 0BSD @@ -8,7 +11,7 @@ #include #include -#include +#include "dynarmic/mcl/bit.hpp" namespace Dynarmic::A32 { diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h index c6f034ae21..1e3d368187 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -15,7 +15,7 @@ #include #include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/frontend/decoder/decoder_detail.h" @@ -36,25 +36,19 @@ inline size_t ToFastLookupIndexArm(u32 instruction) noexcept { } // namespace detail template -constexpr ArmDecodeTable GetArmDecodeTable() noexcept { - std::vector> list = { +static ArmDecodeTable GetArmDecodeTable() noexcept { + ArmDecodeTable table{}; + for (size_t i = 0; i < table.size(); ++i) { + // PLEASE HEAP ELLIDE + for (auto const& e : std::vector>{ #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./arm.inc" #undef INST - }; - - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) { - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); - }); - - ArmDecodeTable table{}; - for (size_t i = 0; i < table.size(); ++i) { - for (auto matcher : list) { - const auto expect = detail::ToFastLookupIndexArm(matcher.GetExpected()); - const auto mask = detail::ToFastLookupIndexArm(matcher.GetMask()); + }) { + auto const expect = detail::ToFastLookupIndexArm(e.GetExpected()); + auto const mask = detail::ToFastLookupIndexArm(e.GetMask()); if ((i & mask) == expect) { - table[i].push_back(matcher); + table[i].push_back(e); } } } @@ -62,7 +56,7 @@ constexpr ArmDecodeTable GetArmDecodeTable() noexcept { } template -std::optional>> DecodeArm(u32 instruction) noexcept { +static std::optional>> DecodeArm(u32 instruction) noexcept { alignas(64) static const auto table = GetArmDecodeTable(); const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); @@ -73,7 +67,7 @@ std::optional>> DecodeArm(u32 instruc } template -std::optional GetNameARM(u32 inst) noexcept { +static std::optional GetNameARM(u32 inst) noexcept { std::vector>> list = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./arm.inc" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.inc b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.inc index 90995c4c05..07f7ce0154 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.inc +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.inc @@ -1,316 +1,265 @@ -// Barrier instructions -INST(arm_DMB, "DMB", "1111010101111111111100000101oooo") // v7 -INST(arm_DSB, "DSB", "1111010101111111111100000100oooo") // v7 -INST(arm_ISB, "ISB", "1111010101111111111100000110oooo") // v7 +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// DO NOT REORDER -// Branch instructions -INST(arm_BLX_imm, "BLX (imm)", "1111101hvvvvvvvvvvvvvvvvvvvvvvvv") // v5 -INST(arm_BLX_reg, "BLX (reg)", "cccc000100101111111111110011mmmm") // v5 -INST(arm_B, "B", "cccc1010vvvvvvvvvvvvvvvvvvvvvvvv") // v1 -INST(arm_BL, "BL", "cccc1011vvvvvvvvvvvvvvvvvvvvvvvv") // v1 -INST(arm_BX, "BX", "cccc000100101111111111110001mmmm") // v4T -INST(arm_BXJ, "BXJ", "cccc000100101111111111110010mmmm") // v5J - -// CRC32 instructions -INST(arm_CRC32, "CRC32", "cccc00010zz0nnnndddd00000100mmmm") // v8 -INST(arm_CRC32C, "CRC32C", "cccc00010zz0nnnndddd00100100mmmm") // v8 - -// Coprocessor instructions -INST(arm_CDP, "CDP", "cccc1110ooooNNNNDDDDppppooo0MMMM") // v2 (CDP2: v5) -INST(arm_LDC, "LDC", "cccc110pudw1nnnnDDDDppppvvvvvvvv") // v2 (LDC2: v5) -INST(arm_MCR, "MCR", "cccc1110ooo0NNNNttttppppooo1MMMM") // v2 (MCR2: v5) -INST(arm_MCRR, "MCRR", "cccc11000100uuuuttttppppooooMMMM") // v5E (MCRR2: v6) -INST(arm_MRC, "MRC", "cccc1110ooo1NNNNttttppppooo1MMMM") // v2 (MRC2: v5) -INST(arm_MRRC, "MRRC", "cccc11000101uuuuttttppppooooMMMM") // v5E (MRRC2: v6) -INST(arm_STC, "STC", "cccc110pudw0nnnnDDDDppppvvvvvvvv") // v2 (STC2: v5) - -// Data Processing instructions -INST(arm_ADC_imm, "ADC (imm)", "cccc0010101Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_ADC_reg, "ADC (reg)", "cccc0000101Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_ADC_rsr, "ADC (rsr)", "cccc0000101Snnnnddddssss0rr1mmmm") // v1 -INST(arm_ADD_imm, "ADD (imm)", "cccc0010100Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_ADD_reg, "ADD (reg)", "cccc0000100Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_ADD_rsr, "ADD (rsr)", "cccc0000100Snnnnddddssss0rr1mmmm") // v1 -INST(arm_AND_imm, "AND (imm)", "cccc0010000Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_AND_reg, "AND (reg)", "cccc0000000Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_AND_rsr, "AND (rsr)", "cccc0000000Snnnnddddssss0rr1mmmm") // v1 -INST(arm_BIC_imm, "BIC (imm)", "cccc0011110Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_BIC_reg, "BIC (reg)", "cccc0001110Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_BIC_rsr, "BIC (rsr)", "cccc0001110Snnnnddddssss0rr1mmmm") // v1 -INST(arm_CMN_imm, "CMN (imm)", "cccc00110111nnnn0000rrrrvvvvvvvv") // v1 -INST(arm_CMN_reg, "CMN (reg)", "cccc00010111nnnn0000vvvvvrr0mmmm") // v1 -INST(arm_CMN_rsr, "CMN (rsr)", "cccc00010111nnnn0000ssss0rr1mmmm") // v1 -INST(arm_CMP_imm, "CMP (imm)", "cccc00110101nnnn0000rrrrvvvvvvvv") // v1 -INST(arm_CMP_reg, "CMP (reg)", "cccc00010101nnnn0000vvvvvrr0mmmm") // v1 -INST(arm_CMP_rsr, "CMP (rsr)", "cccc00010101nnnn0000ssss0rr1mmmm") // v1 -INST(arm_EOR_imm, "EOR (imm)", "cccc0010001Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_EOR_reg, "EOR (reg)", "cccc0000001Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_EOR_rsr, "EOR (rsr)", "cccc0000001Snnnnddddssss0rr1mmmm") // v1 -INST(arm_MOV_imm, "MOV (imm)", "cccc0011101S0000ddddrrrrvvvvvvvv") // v1 -INST(arm_MOV_reg, "MOV (reg)", "cccc0001101S0000ddddvvvvvrr0mmmm") // v1 -INST(arm_MOV_rsr, "MOV (rsr)", "cccc0001101S0000ddddssss0rr1mmmm") // v1 -INST(arm_MVN_imm, "MVN (imm)", "cccc0011111S0000ddddrrrrvvvvvvvv") // v1 -INST(arm_MVN_reg, "MVN (reg)", "cccc0001111S0000ddddvvvvvrr0mmmm") // v1 -INST(arm_MVN_rsr, "MVN (rsr)", "cccc0001111S0000ddddssss0rr1mmmm") // v1 -INST(arm_ORR_imm, "ORR (imm)", "cccc0011100Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_ORR_reg, "ORR (reg)", "cccc0001100Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_ORR_rsr, "ORR (rsr)", "cccc0001100Snnnnddddssss0rr1mmmm") // v1 -INST(arm_RSB_imm, "RSB (imm)", "cccc0010011Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_RSB_reg, "RSB (reg)", "cccc0000011Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_RSB_rsr, "RSB (rsr)", "cccc0000011Snnnnddddssss0rr1mmmm") // v1 -INST(arm_RSC_imm, "RSC (imm)", "cccc0010111Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_RSC_reg, "RSC (reg)", "cccc0000111Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_RSC_rsr, "RSC (rsr)", "cccc0000111Snnnnddddssss0rr1mmmm") // v1 -INST(arm_SBC_imm, "SBC (imm)", "cccc0010110Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_SBC_reg, "SBC (reg)", "cccc0000110Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_SBC_rsr, "SBC (rsr)", "cccc0000110Snnnnddddssss0rr1mmmm") // v1 -INST(arm_SUB_imm, "SUB (imm)", "cccc0010010Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_SUB_reg, "SUB (reg)", "cccc0000010Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_SUB_rsr, "SUB (rsr)", "cccc0000010Snnnnddddssss0rr1mmmm") // v1 -INST(arm_TEQ_imm, "TEQ (imm)", "cccc00110011nnnn0000rrrrvvvvvvvv") // v1 -INST(arm_TEQ_reg, "TEQ (reg)", "cccc00010011nnnn0000vvvvvrr0mmmm") // v1 -INST(arm_TEQ_rsr, "TEQ (rsr)", "cccc00010011nnnn0000ssss0rr1mmmm") // v1 -INST(arm_TST_imm, "TST (imm)", "cccc00110001nnnn0000rrrrvvvvvvvv") // v1 -INST(arm_TST_reg, "TST (reg)", "cccc00010001nnnn0000vvvvvrr0mmmm") // v1 -INST(arm_TST_rsr, "TST (rsr)", "cccc00010001nnnn0000ssss0rr1mmmm") // v1 - -// Exception Generating instructions -INST(arm_BKPT, "BKPT", "cccc00010010vvvvvvvvvvvv0111vvvv") // v5 -INST(arm_SVC, "SVC", "cccc1111vvvvvvvvvvvvvvvvvvvvvvvv") // v1 -INST(arm_UDF, "UDF", "111001111111------------1111----") - -// Extension instructions -INST(arm_SXTB, "SXTB", "cccc011010101111ddddrr000111mmmm") // v6 -INST(arm_SXTB16, "SXTB16", "cccc011010001111ddddrr000111mmmm") // v6 -INST(arm_SXTH, "SXTH", "cccc011010111111ddddrr000111mmmm") // v6 -INST(arm_SXTAB, "SXTAB", "cccc01101010nnnnddddrr000111mmmm") // v6 -INST(arm_SXTAB16, "SXTAB16", "cccc01101000nnnnddddrr000111mmmm") // v6 -INST(arm_SXTAH, "SXTAH", "cccc01101011nnnnddddrr000111mmmm") // v6 -INST(arm_UXTB, "UXTB", "cccc011011101111ddddrr000111mmmm") // v6 -INST(arm_UXTB16, "UXTB16", "cccc011011001111ddddrr000111mmmm") // v6 -INST(arm_UXTH, "UXTH", "cccc011011111111ddddrr000111mmmm") // v6 -INST(arm_UXTAB, "UXTAB", "cccc01101110nnnnddddrr000111mmmm") // v6 -INST(arm_UXTAB16, "UXTAB16", "cccc01101100nnnnddddrr000111mmmm") // v6 -INST(arm_UXTAH, "UXTAH", "cccc01101111nnnnddddrr000111mmmm") // v6 - -// Hint instructions -INST(arm_PLD_imm, "PLD (imm)", "11110101uz01nnnn1111iiiiiiiiiiii") // v5E for PLD; v7 for PLDW -INST(arm_PLD_reg, "PLD (reg)", "11110111uz01nnnn1111iiiiitt0mmmm") // v5E for PLD; v7 for PLDW -INST(arm_SEV, "SEV", "----0011001000001111000000000100") // v6K -INST(arm_SEVL, "SEVL", "----0011001000001111000000000101") // v8 -INST(arm_WFE, "WFE", "----0011001000001111000000000010") // v6K -INST(arm_WFI, "WFI", "----0011001000001111000000000011") // v6K -INST(arm_YIELD, "YIELD", "----0011001000001111000000000001") // v6K -INST(arm_NOP, "Reserved Hint", "----0011001000001111------------") -INST(arm_NOP, "Reserved Hint", "----001100100000111100000000----") - -// Synchronization Primitive instructions -INST(arm_CLREX, "CLREX", "11110101011111111111000000011111") // v6K -INST(arm_SWP, "SWP", "cccc00010000nnnntttt00001001uuuu") // v2S (v6: Deprecated) -INST(arm_SWPB, "SWPB", "cccc00010100nnnntttt00001001uuuu") // v2S (v6: Deprecated) -INST(arm_STL, "STL", "cccc00011000nnnn111111001001tttt") // v8 -INST(arm_STLEX, "STLEX", "cccc00011000nnnndddd11101001tttt") // v8 -INST(arm_STREX, "STREX", "cccc00011000nnnndddd11111001mmmm") // v6 -INST(arm_LDA, "LDA", "cccc00011001nnnndddd110010011111") // v8 -INST(arm_LDAEX, "LDAEX", "cccc00011001nnnndddd111010011111") // v8 -INST(arm_LDREX, "LDREX", "cccc00011001nnnndddd111110011111") // v6 -INST(arm_STLEXD, "STLEXD", "cccc00011010nnnndddd11101001mmmm") // v8 -INST(arm_STREXD, "STREXD", "cccc00011010nnnndddd11111001mmmm") // v6K -INST(arm_LDAEXD, "LDAEXD", "cccc00011011nnnndddd111010011111") // v8 -INST(arm_LDREXD, "LDREXD", "cccc00011011nnnndddd111110011111") // v6K -INST(arm_STLB, "STLB", "cccc00011100nnnn111111001001tttt") // v8 -INST(arm_STLEXB, "STLEXB", "cccc00011100nnnndddd11101001mmmm") // v8 -INST(arm_STREXB, "STREXB", "cccc00011100nnnndddd11111001mmmm") // v6K -INST(arm_LDAB, "LDAB", "cccc00011101nnnndddd110010011111") // v8 -INST(arm_LDAEXB, "LDAEXB", "cccc00011101nnnndddd111010011111") // v8 -INST(arm_LDREXB, "LDREXB", "cccc00011101nnnndddd111110011111") // v6K -INST(arm_STLH, "STLH", "cccc00011110nnnn111111001001mmmm") // v8 -INST(arm_STLEXH, "STLEXH", "cccc00011110nnnndddd11101001mmmm") // v8 -INST(arm_STREXH, "STREXH", "cccc00011110nnnndddd11111001mmmm") // v6K -INST(arm_LDAH, "LDAH", "cccc00011111nnnndddd110010011111") // v8 -INST(arm_LDAEXH, "LDAEXH", "cccc00011111nnnndddd111010011111") // v8 -INST(arm_LDREXH, "LDREXH", "cccc00011111nnnndddd111110011111") // v6K - -// Load/Store instructions -INST(arm_LDRBT, "LDRBT (A1)", "----0100-111--------------------") // v1 -INST(arm_LDRBT, "LDRBT (A2)", "----0110-111---------------0----") // v1 -INST(arm_LDRHT, "LDRHT (A1)", "----0000-111------------1011----") // v6T2 -INST(arm_LDRHT, "LDRHT (A1)", "----0000-1111111--------1011----") // v6T2 -INST(arm_LDRHT, "LDRHT (A2)", "----0000-011--------00001011----") // v6T2 -INST(arm_LDRSBT, "LDRSBT (A1)", "----0000-111------------1101----") // v6T2 -INST(arm_LDRSBT, "LDRSBT (A2)", "----0000-011--------00001101----") // v6T2 -INST(arm_LDRSHT, "LDRSHT (A1)", "----0000-111------------1111----") // v6T2 -INST(arm_LDRSHT, "LDRSHT (A2)", "----0000-011--------00001111----") // v6T2 -INST(arm_LDRT, "LDRT (A1)", "----0100-011--------------------") // v1 -INST(arm_LDRT, "LDRT (A2)", "----0110-011---------------0----") // v1 -INST(arm_STRBT, "STRBT (A1)", "----0100-110--------------------") // v1 -INST(arm_STRBT, "STRBT (A2)", "----0110-110---------------0----") // v1 -INST(arm_STRHT, "STRHT (A1)", "----0000-110------------1011----") // v6T2 -INST(arm_STRHT, "STRHT (A2)", "----0000-010--------00001011----") // v6T2 -INST(arm_STRT, "STRT (A1)", "----0100-010--------------------") // v1 -INST(arm_STRT, "STRT (A2)", "----0110-010---------------0----") // v1 -INST(arm_LDR_lit, "LDR (lit)", "cccc0101u0011111ttttvvvvvvvvvvvv") // v1 -INST(arm_LDR_imm, "LDR (imm)", "cccc010pu0w1nnnnttttvvvvvvvvvvvv") // v1 -INST(arm_LDR_reg, "LDR (reg)", "cccc011pu0w1nnnnttttvvvvvrr0mmmm") // v1 -INST(arm_LDRB_lit, "LDRB (lit)", "cccc0101u1011111ttttvvvvvvvvvvvv") // v1 -INST(arm_LDRB_imm, "LDRB (imm)", "cccc010pu1w1nnnnttttvvvvvvvvvvvv") // v1 -INST(arm_LDRB_reg, "LDRB (reg)", "cccc011pu1w1nnnnttttvvvvvrr0mmmm") // v1 -INST(arm_LDRD_lit, "LDRD (lit)", "cccc0001u1001111ttttvvvv1101vvvv") // v5E -INST(arm_LDRD_imm, "LDRD (imm)", "cccc000pu1w0nnnnttttvvvv1101vvvv") // v5E -INST(arm_LDRD_reg, "LDRD (reg)", "cccc000pu0w0nnnntttt00001101mmmm") // v5E -INST(arm_LDRH_lit, "LDRH (lit)", "cccc000pu1w11111ttttvvvv1011vvvv") // v4 -INST(arm_LDRH_imm, "LDRH (imm)", "cccc000pu1w1nnnnttttvvvv1011vvvv") // v4 -INST(arm_LDRH_reg, "LDRH (reg)", "cccc000pu0w1nnnntttt00001011mmmm") // v4 -INST(arm_LDRSB_lit, "LDRSB (lit)", "cccc0001u1011111ttttvvvv1101vvvv") // v4 -INST(arm_LDRSB_imm, "LDRSB (imm)", "cccc000pu1w1nnnnttttvvvv1101vvvv") // v4 -INST(arm_LDRSB_reg, "LDRSB (reg)", "cccc000pu0w1nnnntttt00001101mmmm") // v4 -INST(arm_LDRSH_lit, "LDRSH (lit)", "cccc0001u1011111ttttvvvv1111vvvv") // v4 -INST(arm_LDRSH_imm, "LDRSH (imm)", "cccc000pu1w1nnnnttttvvvv1111vvvv") // v4 -INST(arm_LDRSH_reg, "LDRSH (reg)", "cccc000pu0w1nnnntttt00001111mmmm") // v4 -INST(arm_STR_imm, "STR (imm)", "cccc010pu0w0nnnnttttvvvvvvvvvvvv") // v1 -INST(arm_STR_reg, "STR (reg)", "cccc011pu0w0nnnnttttvvvvvrr0mmmm") // v1 -INST(arm_STRB_imm, "STRB (imm)", "cccc010pu1w0nnnnttttvvvvvvvvvvvv") // v1 -INST(arm_STRB_reg, "STRB (reg)", "cccc011pu1w0nnnnttttvvvvvrr0mmmm") // v1 -INST(arm_STRD_imm, "STRD (imm)", "cccc000pu1w0nnnnttttvvvv1111vvvv") // v5E -INST(arm_STRD_reg, "STRD (reg)", "cccc000pu0w0nnnntttt00001111mmmm") // v5E -INST(arm_STRH_imm, "STRH (imm)", "cccc000pu1w0nnnnttttvvvv1011vvvv") // v4 -INST(arm_STRH_reg, "STRH (reg)", "cccc000pu0w0nnnntttt00001011mmmm") // v4 - -// Load/Store Multiple instructions -INST(arm_LDM, "LDM", "cccc100010w1nnnnxxxxxxxxxxxxxxxx") // v1 -INST(arm_LDMDA, "LDMDA", "cccc100000w1nnnnxxxxxxxxxxxxxxxx") // v1 -INST(arm_LDMDB, "LDMDB", "cccc100100w1nnnnxxxxxxxxxxxxxxxx") // v1 -INST(arm_LDMIB, "LDMIB", "cccc100110w1nnnnxxxxxxxxxxxxxxxx") // v1 -INST(arm_LDM_usr, "LDM (usr reg)", "----100--101--------------------") // v1 -INST(arm_LDM_eret, "LDM (exce ret)", "----100--1-1----1---------------") // v1 -INST(arm_STM, "STM", "cccc100010w0nnnnxxxxxxxxxxxxxxxx") // v1 -INST(arm_STMDA, "STMDA", "cccc100000w0nnnnxxxxxxxxxxxxxxxx") // v1 -INST(arm_STMDB, "STMDB", "cccc100100w0nnnnxxxxxxxxxxxxxxxx") // v1 -INST(arm_STMIB, "STMIB", "cccc100110w0nnnnxxxxxxxxxxxxxxxx") // v1 -INST(arm_STM_usr, "STM (usr reg)", "----100--100--------------------") // v1 - -// Miscellaneous instructions -INST(arm_BFC, "BFC", "cccc0111110vvvvvddddvvvvv0011111") // v6T2 -INST(arm_BFI, "BFI", "cccc0111110vvvvvddddvvvvv001nnnn") // v6T2 -INST(arm_CLZ, "CLZ", "cccc000101101111dddd11110001mmmm") // v5 -INST(arm_MOVT, "MOVT", "cccc00110100vvvvddddvvvvvvvvvvvv") // v6T2 -INST(arm_MOVW, "MOVW", "cccc00110000vvvvddddvvvvvvvvvvvv") // v6T2 -INST(arm_NOP, "NOP", "----0011001000001111000000000000") // v6K -INST(arm_SBFX, "SBFX", "cccc0111101wwwwwddddvvvvv101nnnn") // v6T2 -INST(arm_SEL, "SEL", "cccc01101000nnnndddd11111011mmmm") // v6 -INST(arm_UBFX, "UBFX", "cccc0111111wwwwwddddvvvvv101nnnn") // v6T2 - -// Unsigned Sum of Absolute Differences instructions -INST(arm_USAD8, "USAD8", "cccc01111000dddd1111mmmm0001nnnn") // v6 -INST(arm_USADA8, "USADA8", "cccc01111000ddddaaaammmm0001nnnn") // v6 - -// Packing instructions -INST(arm_PKHBT, "PKHBT", "cccc01101000nnnnddddvvvvv001mmmm") // v6K -INST(arm_PKHTB, "PKHTB", "cccc01101000nnnnddddvvvvv101mmmm") // v6K - -// Reversal instructions -INST(arm_RBIT, "RBIT", "cccc011011111111dddd11110011mmmm") // v6T2 -INST(arm_REV, "REV", "cccc011010111111dddd11110011mmmm") // v6 -INST(arm_REV16, "REV16", "cccc011010111111dddd11111011mmmm") // v6 -INST(arm_REVSH, "REVSH", "cccc011011111111dddd11111011mmmm") // v6 - -// Saturation instructions -INST(arm_SSAT, "SSAT", "cccc0110101vvvvvddddvvvvvr01nnnn") // v6 -INST(arm_SSAT16, "SSAT16", "cccc01101010vvvvdddd11110011nnnn") // v6 -INST(arm_USAT, "USAT", "cccc0110111vvvvvddddvvvvvr01nnnn") // v6 -INST(arm_USAT16, "USAT16", "cccc01101110vvvvdddd11110011nnnn") // v6 - -// Divide instructions -INST(arm_SDIV, "SDIV", "cccc01110001dddd1111mmmm0001nnnn") // v7a -INST(arm_UDIV, "UDIV", "cccc01110011dddd1111mmmm0001nnnn") // v7a - -// Multiply (Normal) instructions -INST(arm_MLA, "MLA", "cccc0000001Sddddaaaammmm1001nnnn") // v2 -INST(arm_MLS, "MLS", "cccc00000110ddddaaaammmm1001nnnn") // v6T2 -INST(arm_MUL, "MUL", "cccc0000000Sdddd0000mmmm1001nnnn") // v2 - -// Multiply (Long) instructions -INST(arm_SMLAL, "SMLAL", "cccc0000111Sddddaaaammmm1001nnnn") // v3M -INST(arm_SMULL, "SMULL", "cccc0000110Sddddaaaammmm1001nnnn") // v3M -INST(arm_UMAAL, "UMAAL", "cccc00000100ddddaaaammmm1001nnnn") // v6 -INST(arm_UMLAL, "UMLAL", "cccc0000101Sddddaaaammmm1001nnnn") // v3M -INST(arm_UMULL, "UMULL", "cccc0000100Sddddaaaammmm1001nnnn") // v3M - -// Multiply (Halfword) instructions -INST(arm_SMLALxy, "SMLALXY", "cccc00010100ddddaaaammmm1xy0nnnn") // v5xP -INST(arm_SMLAxy, "SMLAXY", "cccc00010000ddddaaaammmm1xy0nnnn") // v5xP -INST(arm_SMULxy, "SMULXY", "cccc00010110dddd0000mmmm1xy0nnnn") // v5xP - -// Multiply (Word by Halfword) instructions -INST(arm_SMLAWy, "SMLAWY", "cccc00010010ddddaaaammmm1y00nnnn") // v5xP -INST(arm_SMULWy, "SMULWY", "cccc00010010dddd0000mmmm1y10nnnn") // v5xP - -// Multiply (Most Significant Word) instructions -INST(arm_SMMUL, "SMMUL", "cccc01110101dddd1111mmmm00R1nnnn") // v6 -INST(arm_SMMLA, "SMMLA", "cccc01110101ddddaaaammmm00R1nnnn") // v6 -INST(arm_SMMLS, "SMMLS", "cccc01110101ddddaaaammmm11R1nnnn") // v6 - -// Multiply (Dual) instructions -INST(arm_SMLAD, "SMLAD", "cccc01110000ddddaaaammmm00M1nnnn") // v6 -INST(arm_SMLALD, "SMLALD", "cccc01110100ddddaaaammmm00M1nnnn") // v6 -INST(arm_SMLSD, "SMLSD", "cccc01110000ddddaaaammmm01M1nnnn") // v6 -INST(arm_SMLSLD, "SMLSLD", "cccc01110100ddddaaaammmm01M1nnnn") // v6 -INST(arm_SMUAD, "SMUAD", "cccc01110000dddd1111mmmm00M1nnnn") // v6 -INST(arm_SMUSD, "SMUSD", "cccc01110000dddd1111mmmm01M1nnnn") // v6 - -// Parallel Add/Subtract (Modulo) instructions -INST(arm_SADD8, "SADD8", "cccc01100001nnnndddd11111001mmmm") // v6 -INST(arm_SADD16, "SADD16", "cccc01100001nnnndddd11110001mmmm") // v6 -INST(arm_SASX, "SASX", "cccc01100001nnnndddd11110011mmmm") // v6 -INST(arm_SSAX, "SSAX", "cccc01100001nnnndddd11110101mmmm") // v6 -INST(arm_SSUB8, "SSUB8", "cccc01100001nnnndddd11111111mmmm") // v6 -INST(arm_SSUB16, "SSUB16", "cccc01100001nnnndddd11110111mmmm") // v6 -INST(arm_UADD8, "UADD8", "cccc01100101nnnndddd11111001mmmm") // v6 -INST(arm_UADD16, "UADD16", "cccc01100101nnnndddd11110001mmmm") // v6 -INST(arm_UASX, "UASX", "cccc01100101nnnndddd11110011mmmm") // v6 -INST(arm_USAX, "USAX", "cccc01100101nnnndddd11110101mmmm") // v6 -INST(arm_USUB8, "USUB8", "cccc01100101nnnndddd11111111mmmm") // v6 -INST(arm_USUB16, "USUB16", "cccc01100101nnnndddd11110111mmmm") // v6 - -// Parallel Add/Subtract (Saturating) instructions -INST(arm_QADD8, "QADD8", "cccc01100010nnnndddd11111001mmmm") // v6 -INST(arm_QADD16, "QADD16", "cccc01100010nnnndddd11110001mmmm") // v6 -INST(arm_QASX, "QASX", "cccc01100010nnnndddd11110011mmmm") // v6 -INST(arm_QSAX, "QSAX", "cccc01100010nnnndddd11110101mmmm") // v6 -INST(arm_QSUB8, "QSUB8", "cccc01100010nnnndddd11111111mmmm") // v6 -INST(arm_QSUB16, "QSUB16", "cccc01100010nnnndddd11110111mmmm") // v6 -INST(arm_UQADD8, "UQADD8", "cccc01100110nnnndddd11111001mmmm") // v6 -INST(arm_UQADD16, "UQADD16", "cccc01100110nnnndddd11110001mmmm") // v6 -INST(arm_UQASX, "UQASX", "cccc01100110nnnndddd11110011mmmm") // v6 -INST(arm_UQSAX, "UQSAX", "cccc01100110nnnndddd11110101mmmm") // v6 -INST(arm_UQSUB8, "UQSUB8", "cccc01100110nnnndddd11111111mmmm") // v6 -INST(arm_UQSUB16, "UQSUB16", "cccc01100110nnnndddd11110111mmmm") // v6 - -// Parallel Add/Subtract (Halving) instructions -INST(arm_SHADD8, "SHADD8", "cccc01100011nnnndddd11111001mmmm") // v6 -INST(arm_SHADD16, "SHADD16", "cccc01100011nnnndddd11110001mmmm") // v6 -INST(arm_SHASX, "SHASX", "cccc01100011nnnndddd11110011mmmm") // v6 -INST(arm_SHSAX, "SHSAX", "cccc01100011nnnndddd11110101mmmm") // v6 -INST(arm_SHSUB8, "SHSUB8", "cccc01100011nnnndddd11111111mmmm") // v6 -INST(arm_SHSUB16, "SHSUB16", "cccc01100011nnnndddd11110111mmmm") // v6 -INST(arm_UHADD8, "UHADD8", "cccc01100111nnnndddd11111001mmmm") // v6 -INST(arm_UHADD16, "UHADD16", "cccc01100111nnnndddd11110001mmmm") // v6 -INST(arm_UHASX, "UHASX", "cccc01100111nnnndddd11110011mmmm") // v6 -INST(arm_UHSAX, "UHSAX", "cccc01100111nnnndddd11110101mmmm") // v6 -INST(arm_UHSUB8, "UHSUB8", "cccc01100111nnnndddd11111111mmmm") // v6 -INST(arm_UHSUB16, "UHSUB16", "cccc01100111nnnndddd11110111mmmm") // v6 - -// Saturated Add/Subtract instructions -INST(arm_QADD, "QADD", "cccc00010000nnnndddd00000101mmmm") // v5xP -INST(arm_QSUB, "QSUB", "cccc00010010nnnndddd00000101mmmm") // v5xP -INST(arm_QDADD, "QDADD", "cccc00010100nnnndddd00000101mmmm") // v5xP -INST(arm_QDSUB, "QDSUB", "cccc00010110nnnndddd00000101mmmm") // v5xP - -// Status Register Access instructions -INST(arm_CPS, "CPS", "111100010000---00000000---0-----") // v6 -INST(arm_SETEND, "SETEND", "1111000100000001000000e000000000") // v6 -INST(arm_MRS, "MRS", "cccc000100001111dddd000000000000") // v3 -INST(arm_MSR_imm, "MSR (imm)", "cccc00110010mmmm1111rrrrvvvvvvvv") // v3 -INST(arm_MSR_reg, "MSR (reg)", "cccc00010010mmmm111100000000nnnn") // v3 -INST(arm_RFE, "RFE", "1111100--0-1----0000101000000000") // v6 -INST(arm_SRS, "SRS", "1111100--1-0110100000101000-----") // v6 +INST(arm_CLREX, "CLREX", "11110101011111111111000000011111") +INST(arm_SETEND, "SETEND", "1111000100000001000000e000000000") +INST(arm_DMB, "DMB", "1111010101111111111100000101oooo") +INST(arm_DSB, "DSB", "1111010101111111111100000100oooo") +INST(arm_ISB, "ISB", "1111010101111111111100000110oooo") +INST(arm_SEV, "SEV", "----0011001000001111000000000100") +INST(arm_SEVL, "SEVL", "----0011001000001111000000000101") +INST(arm_WFE, "WFE", "----0011001000001111000000000010") +INST(arm_WFI, "WFI", "----0011001000001111000000000011") +INST(arm_YIELD, "YIELD", "----0011001000001111000000000001") +INST(arm_NOP, "NOP", "----0011001000001111000000000000") +INST(arm_RFE, "RFE", "1111100--0-1----0000101000000000") +INST(arm_BLX_reg, "BLX (reg)", "cccc000100101111111111110011mmmm") +INST(arm_BX, "BX", "cccc000100101111111111110001mmmm") +INST(arm_BXJ, "BXJ", "cccc000100101111111111110010mmmm") +INST(arm_NOP, "Reserved Hint", "----001100100000111100000000----") +INST(arm_MRS, "MRS", "cccc000100001111dddd000000000000") +INST(arm_SRS, "SRS", "1111100--1-0110100000101000-----") +INST(arm_CPS, "CPS", "111100010000---00000000---0-----") +INST(arm_STL, "STL", "cccc00011000nnnn111111001001tttt") +INST(arm_LDA, "LDA", "cccc00011001nnnndddd110010011111") +INST(arm_LDAEX, "LDAEX", "cccc00011001nnnndddd111010011111") +INST(arm_LDREX, "LDREX", "cccc00011001nnnndddd111110011111") +INST(arm_LDAEXD, "LDAEXD", "cccc00011011nnnndddd111010011111") +INST(arm_LDREXD, "LDREXD", "cccc00011011nnnndddd111110011111") +INST(arm_STLB, "STLB", "cccc00011100nnnn111111001001tttt") +INST(arm_LDAB, "LDAB", "cccc00011101nnnndddd110010011111") +INST(arm_LDAEXB, "LDAEXB", "cccc00011101nnnndddd111010011111") +INST(arm_LDREXB, "LDREXB", "cccc00011101nnnndddd111110011111") +INST(arm_STLH, "STLH", "cccc00011110nnnn111111001001mmmm") +INST(arm_LDAH, "LDAH", "cccc00011111nnnndddd110010011111") +INST(arm_LDAEXH, "LDAEXH", "cccc00011111nnnndddd111010011111") +INST(arm_LDREXH, "LDREXH", "cccc00011111nnnndddd111110011111") +INST(arm_CLZ, "CLZ", "cccc000101101111dddd11110001mmmm") +INST(arm_RBIT, "RBIT", "cccc011011111111dddd11110011mmmm") +INST(arm_REV, "REV", "cccc011010111111dddd11110011mmmm") +INST(arm_REV16, "REV16", "cccc011010111111dddd11111011mmmm") +INST(arm_REVSH, "REVSH", "cccc011011111111dddd11111011mmmm") +INST(arm_MSR_reg, "MSR (reg)", "cccc00010010mmmm111100000000nnnn") +INST(arm_SXTB, "SXTB", "cccc011010101111ddddrr000111mmmm") +INST(arm_SXTB16, "SXTB16", "cccc011010001111ddddrr000111mmmm") +INST(arm_SXTH, "SXTH", "cccc011010111111ddddrr000111mmmm") +INST(arm_UXTB, "UXTB", "cccc011011101111ddddrr000111mmmm") +INST(arm_UXTB16, "UXTB16", "cccc011011001111ddddrr000111mmmm") +INST(arm_UXTH, "UXTH", "cccc011011111111ddddrr000111mmmm") +INST(arm_UDF, "UDF", "111001111111------------1111----") +INST(arm_NOP, "Reserved Hint", "----0011001000001111------------") +INST(arm_SWP, "SWP", "cccc00010000nnnntttt00001001uuuu") +INST(arm_SWPB, "SWPB", "cccc00010100nnnntttt00001001uuuu") +INST(arm_STLEX, "STLEX", "cccc00011000nnnndddd11101001tttt") +INST(arm_STREX, "STREX", "cccc00011000nnnndddd11111001mmmm") +INST(arm_STLEXD, "STLEXD", "cccc00011010nnnndddd11101001mmmm") +INST(arm_STREXD, "STREXD", "cccc00011010nnnndddd11111001mmmm") +INST(arm_STLEXB, "STLEXB", "cccc00011100nnnndddd11101001mmmm") +INST(arm_STREXB, "STREXB", "cccc00011100nnnndddd11111001mmmm") +INST(arm_STLEXH, "STLEXH", "cccc00011110nnnndddd11101001mmmm") +INST(arm_STREXH, "STREXH", "cccc00011110nnnndddd11111001mmmm") +INST(arm_SEL, "SEL", "cccc01101000nnnndddd11111011mmmm") +INST(arm_USAD8, "USAD8", "cccc01111000dddd1111mmmm0001nnnn") +INST(arm_SSAT16, "SSAT16", "cccc01101010vvvvdddd11110011nnnn") +INST(arm_USAT16, "USAT16", "cccc01101110vvvvdddd11110011nnnn") +INST(arm_SDIV, "SDIV", "cccc01110001dddd1111mmmm0001nnnn") +INST(arm_UDIV, "UDIV", "cccc01110011dddd1111mmmm0001nnnn") +INST(arm_SADD8, "SADD8", "cccc01100001nnnndddd11111001mmmm") +INST(arm_SADD16, "SADD16", "cccc01100001nnnndddd11110001mmmm") +INST(arm_SASX, "SASX", "cccc01100001nnnndddd11110011mmmm") +INST(arm_SSAX, "SSAX", "cccc01100001nnnndddd11110101mmmm") +INST(arm_SSUB8, "SSUB8", "cccc01100001nnnndddd11111111mmmm") +INST(arm_SSUB16, "SSUB16", "cccc01100001nnnndddd11110111mmmm") +INST(arm_UADD8, "UADD8", "cccc01100101nnnndddd11111001mmmm") +INST(arm_UADD16, "UADD16", "cccc01100101nnnndddd11110001mmmm") +INST(arm_UASX, "UASX", "cccc01100101nnnndddd11110011mmmm") +INST(arm_USAX, "USAX", "cccc01100101nnnndddd11110101mmmm") +INST(arm_USUB8, "USUB8", "cccc01100101nnnndddd11111111mmmm") +INST(arm_USUB16, "USUB16", "cccc01100101nnnndddd11110111mmmm") +INST(arm_QADD8, "QADD8", "cccc01100010nnnndddd11111001mmmm") +INST(arm_QADD16, "QADD16", "cccc01100010nnnndddd11110001mmmm") +INST(arm_QASX, "QASX", "cccc01100010nnnndddd11110011mmmm") +INST(arm_QSAX, "QSAX", "cccc01100010nnnndddd11110101mmmm") +INST(arm_QSUB8, "QSUB8", "cccc01100010nnnndddd11111111mmmm") +INST(arm_QSUB16, "QSUB16", "cccc01100010nnnndddd11110111mmmm") +INST(arm_UQADD8, "UQADD8", "cccc01100110nnnndddd11111001mmmm") +INST(arm_UQADD16, "UQADD16", "cccc01100110nnnndddd11110001mmmm") +INST(arm_UQASX, "UQASX", "cccc01100110nnnndddd11110011mmmm") +INST(arm_UQSAX, "UQSAX", "cccc01100110nnnndddd11110101mmmm") +INST(arm_UQSUB8, "UQSUB8", "cccc01100110nnnndddd11111111mmmm") +INST(arm_UQSUB16, "UQSUB16", "cccc01100110nnnndddd11110111mmmm") +INST(arm_SHADD8, "SHADD8", "cccc01100011nnnndddd11111001mmmm") +INST(arm_SHADD16, "SHADD16", "cccc01100011nnnndddd11110001mmmm") +INST(arm_SHASX, "SHASX", "cccc01100011nnnndddd11110011mmmm") +INST(arm_SHSAX, "SHSAX", "cccc01100011nnnndddd11110101mmmm") +INST(arm_SHSUB8, "SHSUB8", "cccc01100011nnnndddd11111111mmmm") +INST(arm_SHSUB16, "SHSUB16", "cccc01100011nnnndddd11110111mmmm") +INST(arm_UHADD8, "UHADD8", "cccc01100111nnnndddd11111001mmmm") +INST(arm_UHADD16, "UHADD16", "cccc01100111nnnndddd11110001mmmm") +INST(arm_UHASX, "UHASX", "cccc01100111nnnndddd11110011mmmm") +INST(arm_UHSAX, "UHSAX", "cccc01100111nnnndddd11110101mmmm") +INST(arm_UHSUB8, "UHSUB8", "cccc01100111nnnndddd11111111mmmm") +INST(arm_UHSUB16, "UHSUB16", "cccc01100111nnnndddd11110111mmmm") +INST(arm_QADD, "QADD", "cccc00010000nnnndddd00000101mmmm") +INST(arm_QSUB, "QSUB", "cccc00010010nnnndddd00000101mmmm") +INST(arm_QDADD, "QDADD", "cccc00010100nnnndddd00000101mmmm") +INST(arm_QDSUB, "QDSUB", "cccc00010110nnnndddd00000101mmmm") +INST(arm_PLD_reg, "PLD (reg)", "11110111uz01nnnn1111iiiiitt0mmmm") +INST(arm_LDRHT, "LDRHT (A1)", "----0000-1111111--------1011----") +INST(arm_LDRHT, "LDRHT (A2)", "----0000-011--------00001011----") +INST(arm_LDRSBT, "LDRSBT (A2)", "----0000-011--------00001101----") +INST(arm_LDRSHT, "LDRSHT (A2)", "----0000-011--------00001111----") +INST(arm_STRHT, "STRHT (A2)", "----0000-010--------00001011----") +INST(arm_LDRD_lit, "LDRD (lit)", "cccc0001u1001111ttttvvvv1101vvvv") +INST(arm_LDRSB_lit, "LDRSB (lit)", "cccc0001u1011111ttttvvvv1101vvvv") +INST(arm_LDRSH_lit, "LDRSH (lit)", "cccc0001u1011111ttttvvvv1111vvvv") +INST(arm_MUL, "MUL", "cccc0000000Sdddd0000mmmm1001nnnn") +INST(arm_SMULWy, "SMULWY", "cccc00010010dddd0000mmmm1y10nnnn") +INST(arm_SMMUL, "SMMUL", "cccc01110101dddd1111mmmm00R1nnnn") +INST(arm_SMUAD, "SMUAD", "cccc01110000dddd1111mmmm00M1nnnn") +INST(arm_SMUSD, "SMUSD", "cccc01110000dddd1111mmmm01M1nnnn") +INST(arm_CRC32, "CRC32", "cccc00010zz0nnnndddd00000100mmmm") +INST(arm_CRC32C, "CRC32C", "cccc00010zz0nnnndddd00100100mmmm") +INST(arm_CMN_rsr, "CMN (rsr)", "cccc00010111nnnn0000ssss0rr1mmmm") +INST(arm_CMP_rsr, "CMP (rsr)", "cccc00010101nnnn0000ssss0rr1mmmm") +INST(arm_TEQ_rsr, "TEQ (rsr)", "cccc00010011nnnn0000ssss0rr1mmmm") +INST(arm_TST_rsr, "TST (rsr)", "cccc00010001nnnn0000ssss0rr1mmmm") +INST(arm_SXTAB, "SXTAB", "cccc01101010nnnnddddrr000111mmmm") +INST(arm_SXTAB16, "SXTAB16", "cccc01101000nnnnddddrr000111mmmm") +INST(arm_SXTAH, "SXTAH", "cccc01101011nnnnddddrr000111mmmm") +INST(arm_UXTAB, "UXTAB", "cccc01101110nnnnddddrr000111mmmm") +INST(arm_UXTAB16, "UXTAB16", "cccc01101100nnnnddddrr000111mmmm") +INST(arm_UXTAH, "UXTAH", "cccc01101111nnnnddddrr000111mmmm") +INST(arm_PLD_imm, "PLD (imm)", "11110101uz01nnnn1111iiiiiiiiiiii") +INST(arm_BFC, "BFC", "cccc0111110vvvvvddddvvvvv0011111") +INST(arm_SMULxy, "SMULXY", "cccc00010110dddd0000mmmm1xy0nnnn") +INST(arm_CMN_reg, "CMN (reg)", "cccc00010111nnnn0000vvvvvrr0mmmm") +INST(arm_CMP_reg, "CMP (reg)", "cccc00010101nnnn0000vvvvvrr0mmmm") +INST(arm_MOV_rsr, "MOV (rsr)", "cccc0001101S0000ddddssss0rr1mmmm") +INST(arm_MVN_rsr, "MVN (rsr)", "cccc0001111S0000ddddssss0rr1mmmm") +INST(arm_TEQ_reg, "TEQ (reg)", "cccc00010011nnnn0000vvvvvrr0mmmm") +INST(arm_TST_reg, "TST (reg)", "cccc00010001nnnn0000vvvvvrr0mmmm") +INST(arm_LDRD_reg, "LDRD (reg)", "cccc000pu0w0nnnntttt00001101mmmm") +INST(arm_LDRH_lit, "LDRH (lit)", "cccc000pu1w11111ttttvvvv1011vvvv") +INST(arm_LDRH_reg, "LDRH (reg)", "cccc000pu0w1nnnntttt00001011mmmm") +INST(arm_LDRSB_reg, "LDRSB (reg)", "cccc000pu0w1nnnntttt00001101mmmm") +INST(arm_LDRSH_reg, "LDRSH (reg)", "cccc000pu0w1nnnntttt00001111mmmm") +INST(arm_STRD_reg, "STRD (reg)", "cccc000pu0w0nnnntttt00001111mmmm") +INST(arm_STRH_reg, "STRH (reg)", "cccc000pu0w0nnnntttt00001011mmmm") +INST(arm_CMN_imm, "CMN (imm)", "cccc00110111nnnn0000rrrrvvvvvvvv") +INST(arm_CMP_imm, "CMP (imm)", "cccc00110101nnnn0000rrrrvvvvvvvv") +INST(arm_MOV_reg, "MOV (reg)", "cccc0001101S0000ddddvvvvvrr0mmmm") +INST(arm_MVN_reg, "MVN (reg)", "cccc0001111S0000ddddvvvvvrr0mmmm") +INST(arm_TEQ_imm, "TEQ (imm)", "cccc00110011nnnn0000rrrrvvvvvvvv") +INST(arm_TST_imm, "TST (imm)", "cccc00110001nnnn0000rrrrvvvvvvvv") +INST(arm_BKPT, "BKPT", "cccc00010010vvvvvvvvvvvv0111vvvv") +INST(arm_USADA8, "USADA8", "cccc01111000ddddaaaammmm0001nnnn") +INST(arm_MLS, "MLS", "cccc00000110ddddaaaammmm1001nnnn") +INST(arm_UMAAL, "UMAAL", "cccc00000100ddddaaaammmm1001nnnn") +INST(arm_MSR_imm, "MSR (imm)", "cccc00110010mmmm1111rrrrvvvvvvvv") +INST(arm_MOV_imm, "MOV (imm)", "cccc0011101S0000ddddrrrrvvvvvvvv") +INST(arm_MVN_imm, "MVN (imm)", "cccc0011111S0000ddddrrrrvvvvvvvv") +INST(arm_LDRHT, "LDRHT (A1)", "----0000-111------------1011----") +INST(arm_LDRSBT, "LDRSBT (A1)", "----0000-111------------1101----") +INST(arm_LDRSHT, "LDRSHT (A1)", "----0000-111------------1111----") +INST(arm_STRHT, "STRHT (A1)", "----0000-110------------1011----") +INST(arm_LDR_lit, "LDR (lit)", "cccc0101u0011111ttttvvvvvvvvvvvv") +INST(arm_LDRB_lit, "LDRB (lit)", "cccc0101u1011111ttttvvvvvvvvvvvv") +INST(arm_PKHBT, "PKHBT", "cccc01101000nnnnddddvvvvv001mmmm") +INST(arm_PKHTB, "PKHTB", "cccc01101000nnnnddddvvvvv101mmmm") +INST(arm_MLA, "MLA", "cccc0000001Sddddaaaammmm1001nnnn") +INST(arm_SMLAL, "SMLAL", "cccc0000111Sddddaaaammmm1001nnnn") +INST(arm_SMULL, "SMULL", "cccc0000110Sddddaaaammmm1001nnnn") +INST(arm_UMLAL, "UMLAL", "cccc0000101Sddddaaaammmm1001nnnn") +INST(arm_UMULL, "UMULL", "cccc0000100Sddddaaaammmm1001nnnn") +INST(arm_SMLAWy, "SMLAWY", "cccc00010010ddddaaaammmm1y00nnnn") +INST(arm_SMMLA, "SMMLA", "cccc01110101ddddaaaammmm00R1nnnn") +INST(arm_SMMLS, "SMMLS", "cccc01110101ddddaaaammmm11R1nnnn") +INST(arm_SMLAD, "SMLAD", "cccc01110000ddddaaaammmm00M1nnnn") +INST(arm_SMLALD, "SMLALD", "cccc01110100ddddaaaammmm00M1nnnn") +INST(arm_SMLSD, "SMLSD", "cccc01110000ddddaaaammmm01M1nnnn") +INST(arm_SMLSLD, "SMLSLD", "cccc01110100ddddaaaammmm01M1nnnn") +INST(arm_BFI, "BFI", "cccc0111110vvvvvddddvvvvv001nnnn") +INST(arm_SBFX, "SBFX", "cccc0111101wwwwwddddvvvvv101nnnn") +INST(arm_UBFX, "UBFX", "cccc0111111wwwwwddddvvvvv101nnnn") +INST(arm_SMLALxy, "SMLALXY", "cccc00010100ddddaaaammmm1xy0nnnn") +INST(arm_SMLAxy, "SMLAXY", "cccc00010000ddddaaaammmm1xy0nnnn") +INST(arm_ADC_rsr, "ADC (rsr)", "cccc0000101Snnnnddddssss0rr1mmmm") +INST(arm_ADD_rsr, "ADD (rsr)", "cccc0000100Snnnnddddssss0rr1mmmm") +INST(arm_AND_rsr, "AND (rsr)", "cccc0000000Snnnnddddssss0rr1mmmm") +INST(arm_BIC_rsr, "BIC (rsr)", "cccc0001110Snnnnddddssss0rr1mmmm") +INST(arm_EOR_rsr, "EOR (rsr)", "cccc0000001Snnnnddddssss0rr1mmmm") +INST(arm_ORR_rsr, "ORR (rsr)", "cccc0001100Snnnnddddssss0rr1mmmm") +INST(arm_RSB_rsr, "RSB (rsr)", "cccc0000011Snnnnddddssss0rr1mmmm") +INST(arm_RSC_rsr, "RSC (rsr)", "cccc0000111Snnnnddddssss0rr1mmmm") +INST(arm_SBC_rsr, "SBC (rsr)", "cccc0000110Snnnnddddssss0rr1mmmm") +INST(arm_SUB_rsr, "SUB (rsr)", "cccc0000010Snnnnddddssss0rr1mmmm") +INST(arm_LDRD_imm, "LDRD (imm)", "cccc000pu1w0nnnnttttvvvv1101vvvv") +INST(arm_LDRH_imm, "LDRH (imm)", "cccc000pu1w1nnnnttttvvvv1011vvvv") +INST(arm_LDRSB_imm, "LDRSB (imm)", "cccc000pu1w1nnnnttttvvvv1101vvvv") +INST(arm_LDRSH_imm, "LDRSH (imm)", "cccc000pu1w1nnnnttttvvvv1111vvvv") +INST(arm_STRD_imm, "STRD (imm)", "cccc000pu1w0nnnnttttvvvv1111vvvv") +INST(arm_STRH_imm, "STRH (imm)", "cccc000pu1w0nnnnttttvvvv1011vvvv") +INST(arm_SSAT, "SSAT", "cccc0110101vvvvvddddvvvvvr01nnnn") +INST(arm_USAT, "USAT", "cccc0110111vvvvvddddvvvvvr01nnnn") +INST(arm_MCRR, "MCRR", "cccc11000100uuuuttttppppooooMMMM") +INST(arm_MRRC, "MRRC", "cccc11000101uuuuttttppppooooMMMM") +INST(arm_ADC_reg, "ADC (reg)", "cccc0000101Snnnnddddvvvvvrr0mmmm") +INST(arm_ADD_reg, "ADD (reg)", "cccc0000100Snnnnddddvvvvvrr0mmmm") +INST(arm_AND_reg, "AND (reg)", "cccc0000000Snnnnddddvvvvvrr0mmmm") +INST(arm_BIC_reg, "BIC (reg)", "cccc0001110Snnnnddddvvvvvrr0mmmm") +INST(arm_EOR_reg, "EOR (reg)", "cccc0000001Snnnnddddvvvvvrr0mmmm") +INST(arm_ORR_reg, "ORR (reg)", "cccc0001100Snnnnddddvvvvvrr0mmmm") +INST(arm_RSB_reg, "RSB (reg)", "cccc0000011Snnnnddddvvvvvrr0mmmm") +INST(arm_RSC_reg, "RSC (reg)", "cccc0000111Snnnnddddvvvvvrr0mmmm") +INST(arm_SBC_reg, "SBC (reg)", "cccc0000110Snnnnddddvvvvvrr0mmmm") +INST(arm_SUB_reg, "SUB (reg)", "cccc0000010Snnnnddddvvvvvrr0mmmm") +INST(arm_LDRBT, "LDRBT (A2)", "----0110-111---------------0----") +INST(arm_LDRT, "LDRT (A2)", "----0110-011---------------0----") +INST(arm_STRBT, "STRBT (A2)", "----0110-110---------------0----") +INST(arm_STRT, "STRT (A2)", "----0110-010---------------0----") +INST(arm_MOVT, "MOVT", "cccc00110100vvvvddddvvvvvvvvvvvv") +INST(arm_MOVW, "MOVW", "cccc00110000vvvvddddvvvvvvvvvvvv") +INST(arm_BLX_imm, "BLX (imm)", "1111101hvvvvvvvvvvvvvvvvvvvvvvvv") +INST(arm_ADC_imm, "ADC (imm)", "cccc0010101Snnnnddddrrrrvvvvvvvv") +INST(arm_ADD_imm, "ADD (imm)", "cccc0010100Snnnnddddrrrrvvvvvvvv") +INST(arm_AND_imm, "AND (imm)", "cccc0010000Snnnnddddrrrrvvvvvvvv") +INST(arm_BIC_imm, "BIC (imm)", "cccc0011110Snnnnddddrrrrvvvvvvvv") +INST(arm_EOR_imm, "EOR (imm)", "cccc0010001Snnnnddddrrrrvvvvvvvv") +INST(arm_ORR_imm, "ORR (imm)", "cccc0011100Snnnnddddrrrrvvvvvvvv") +INST(arm_RSB_imm, "RSB (imm)", "cccc0010011Snnnnddddrrrrvvvvvvvv") +INST(arm_RSC_imm, "RSC (imm)", "cccc0010111Snnnnddddrrrrvvvvvvvv") +INST(arm_SBC_imm, "SBC (imm)", "cccc0010110Snnnnddddrrrrvvvvvvvv") +INST(arm_SUB_imm, "SUB (imm)", "cccc0010010Snnnnddddrrrrvvvvvvvv") +INST(arm_LDRBT, "LDRBT (A1)", "----0100-111--------------------") +INST(arm_LDRT, "LDRT (A1)", "----0100-011--------------------") +INST(arm_STRBT, "STRBT (A1)", "----0100-110--------------------") +INST(arm_STRT, "STRT (A1)", "----0100-010--------------------") +INST(arm_LDM, "LDM", "cccc100010w1nnnnxxxxxxxxxxxxxxxx") +INST(arm_LDMDA, "LDMDA", "cccc100000w1nnnnxxxxxxxxxxxxxxxx") +INST(arm_LDMDB, "LDMDB", "cccc100100w1nnnnxxxxxxxxxxxxxxxx") +INST(arm_LDMIB, "LDMIB", "cccc100110w1nnnnxxxxxxxxxxxxxxxx") +INST(arm_STM, "STM", "cccc100010w0nnnnxxxxxxxxxxxxxxxx") +INST(arm_STMDA, "STMDA", "cccc100000w0nnnnxxxxxxxxxxxxxxxx") +INST(arm_STMDB, "STMDB", "cccc100100w0nnnnxxxxxxxxxxxxxxxx") +INST(arm_STMIB, "STMIB", "cccc100110w0nnnnxxxxxxxxxxxxxxxx") +INST(arm_MCR, "MCR", "cccc1110ooo0NNNNttttppppooo1MMMM") +INST(arm_MRC, "MRC", "cccc1110ooo1NNNNttttppppooo1MMMM") +INST(arm_LDR_reg, "LDR (reg)", "cccc011pu0w1nnnnttttvvvvvrr0mmmm") +INST(arm_LDRB_reg, "LDRB (reg)", "cccc011pu1w1nnnnttttvvvvvrr0mmmm") +INST(arm_STR_reg, "STR (reg)", "cccc011pu0w0nnnnttttvvvvvrr0mmmm") +INST(arm_STRB_reg, "STRB (reg)", "cccc011pu1w0nnnnttttvvvvvrr0mmmm") +INST(arm_LDM_usr, "LDM (usr reg)", "----100--101--------------------") +INST(arm_LDM_eret, "LDM (exce ret)", "----100--1-1----1---------------") +INST(arm_STM_usr, "STM (usr reg)", "----100--100--------------------") +INST(arm_CDP, "CDP", "cccc1110ooooNNNNDDDDppppooo0MMMM") +INST(arm_LDR_imm, "LDR (imm)", "cccc010pu0w1nnnnttttvvvvvvvvvvvv") +INST(arm_LDRB_imm, "LDRB (imm)", "cccc010pu1w1nnnnttttvvvvvvvvvvvv") +INST(arm_STR_imm, "STR (imm)", "cccc010pu0w0nnnnttttvvvvvvvvvvvv") +INST(arm_STRB_imm, "STRB (imm)", "cccc010pu1w0nnnnttttvvvvvvvvvvvv") +INST(arm_B, "B", "cccc1010vvvvvvvvvvvvvvvvvvvvvvvv") +INST(arm_BL, "BL", "cccc1011vvvvvvvvvvvvvvvvvvvvvvvv") +INST(arm_LDC, "LDC", "cccc110pudw1nnnnDDDDppppvvvvvvvv") +INST(arm_STC, "STC", "cccc110pudw0nnnnDDDDppppvvvvvvvv") +INST(arm_SVC, "SVC", "cccc1111vvvvvvvvvvvvvvvvvvvvvvvv") \ No newline at end of file diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index 6bf52b87e6..2861b998ca 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -15,7 +15,7 @@ #include #include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/frontend/decoder/decoder_detail.h" @@ -27,50 +27,12 @@ template using ASIMDMatcher = Decoder::Matcher; template -std::vector> GetASIMDDecodeTable() noexcept { - std::vector>> table = { -#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +static std::optional>> DecodeASIMD(u32 instruction) noexcept { + alignas(64) static const auto table = std::array{ +#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./asimd.inc" #undef INST }; - // Exceptions to the rule of thumb. - const std::set comes_first{ - "VBIC, VMOV, VMVN, VORR (immediate)", - "VEXT", - "VTBL", - "VTBX", - "VDUP (scalar)", - }; - const std::set comes_last{ - "VMLA (scalar)", - "VMLAL (scalar)", - "VQDMLAL/VQDMLSL (scalar)", - "VMUL (scalar)", - "VMULL (scalar)", - "VQDMULL (scalar)", - "VQDMULH (scalar)", - "VQRDMULH (scalar)", - }; - const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { - return comes_first.count(e.first) > 0; - }); - const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { - return comes_last.count(e.first) == 0; - }); - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) { - return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); - }); - std::vector> final_table; - std::transform(table.cbegin(), table.cend(), std::back_inserter(final_table), [](auto const& e) { - return e.second; - }); - return final_table; -} - -template -std::optional>> DecodeASIMD(u32 instruction) noexcept { - alignas(64) static const auto table = GetASIMDDecodeTable(); auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { return matcher.Matches(instruction); }); @@ -78,7 +40,7 @@ std::optional>> DecodeASIMD(u32 ins } template -std::optional GetNameASIMD(u32 inst) noexcept { +static std::optional GetNameASIMD(u32 inst) noexcept { std::vector>> list = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.inc b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.inc index d0dfd86752..def1a17866 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.inc +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.inc @@ -1,172 +1,151 @@ -// Three registers of the same length -INST(asimd_VHADD, "VHADD", "1111001U0Dzznnnndddd0000NQM0mmmm") // ASIMD -INST(asimd_VQADD, "VQADD", "1111001U0Dzznnnndddd0000NQM1mmmm") // ASIMD -INST(asimd_VRHADD, "VRHADD", "1111001U0Dzznnnndddd0001NQM0mmmm") // ASIMD -INST(asimd_VAND_reg, "VAND (register)", "111100100D00nnnndddd0001NQM1mmmm") // ASIMD -INST(asimd_VBIC_reg, "VBIC (register)", "111100100D01nnnndddd0001NQM1mmmm") // ASIMD -INST(asimd_VORR_reg, "VORR (register)", "111100100D10nnnndddd0001NQM1mmmm") // ASIMD -INST(asimd_VORN_reg, "VORN (register)", "111100100D11nnnndddd0001NQM1mmmm") // ASIMD -INST(asimd_VEOR_reg, "VEOR (register)", "111100110D00nnnndddd0001NQM1mmmm") // ASIMD -INST(asimd_VBSL, "VBSL", "111100110D01nnnndddd0001NQM1mmmm") // ASIMD -INST(asimd_VBIT, "VBIT", "111100110D10nnnndddd0001NQM1mmmm") // ASIMD -INST(asimd_VBIF, "VBIF", "111100110D11nnnndddd0001NQM1mmmm") // ASIMD -INST(asimd_VHSUB, "VHSUB", "1111001U0Dzznnnndddd0010NQM0mmmm") // ASIMD -INST(asimd_VQSUB, "VQSUB", "1111001U0Dzznnnndddd0010NQM1mmmm") // ASIMD -INST(asimd_VCGT_reg, "VCGT (register)", "1111001U0Dzznnnndddd0011NQM0mmmm") // ASIMD -INST(asimd_VCGE_reg, "VCGE (register)", "1111001U0Dzznnnndddd0011NQM1mmmm") // ASIMD -INST(asimd_VSHL_reg, "VSHL (register)", "1111001U0Dzznnnndddd0100NQM0mmmm") // ASIMD -INST(asimd_VQSHL_reg, "VQSHL (register)", "1111001U0Dzznnnndddd0100NQM1mmmm") // ASIMD -INST(asimd_VRSHL, "VRSHL", "1111001U0Dzznnnndddd0101NQM0mmmm") // ASIMD -//INST(asimd_VQRSHL, "VQRSHL", "1111001U0-CC--------0101---1----") // ASIMD -INST(asimd_VMAX, "VMAX/VMIN (integer)", "1111001U0Dzznnnnmmmm0110NQMommmm") // ASIMD -INST(asimd_VABD, "VABD", "1111001U0Dzznnnndddd0111NQM0mmmm") // ASIMD -INST(asimd_VABA, "VABA", "1111001U0Dzznnnndddd0111NQM1mmmm") // ASIMD -INST(asimd_VADD_int, "VADD (integer)", "111100100Dzznnnndddd1000NQM0mmmm") // ASIMD -INST(asimd_VSUB_int, "VSUB (integer)", "111100110Dzznnnndddd1000NQM0mmmm") // ASIMD -INST(asimd_VTST, "VTST", "111100100Dzznnnndddd1000NQM1mmmm") // ASIMD -INST(asimd_VCEQ_reg, "VCEG (register)", "111100110Dzznnnndddd1000NQM1mmmm") // ASIMD -INST(asimd_VMLA, "VMLA/VMLS", "1111001o0Dzznnnndddd1001NQM0mmmm") // ASIMD -INST(asimd_VMUL, "VMUL", "1111001P0Dzznnnndddd1001NQM1mmmm") // ASIMD -INST(asimd_VPMAX_int, "VPMAX/VPMIN (integer)", "1111001U0Dzznnnndddd1010NQMommmm") // ASIMD -INST(v8_VMAXNM, "VMAXNM", "111100110D0znnnndddd1111NQM1mmmm") // v8 -INST(v8_VMINNM, "VMINNM", "111100110D1znnnndddd1111NQM1mmmm") // v8 -INST(asimd_VQDMULH, "VQDMULH", "111100100Dzznnnndddd1011NQM0mmmm") // ASIMD -INST(asimd_VQRDMULH, "VQRDMULH", "111100110Dzznnnndddd1011NQM0mmmm") // ASIMD -INST(asimd_VPADD, "VPADD", "111100100Dzznnnndddd1011NQM1mmmm") // ASIMD -INST(asimd_VFMA, "VFMA", "111100100D0znnnndddd1100NQM1mmmm") // ASIMD -INST(asimd_VFMS, "VFMS", "111100100D1znnnndddd1100NQM1mmmm") // ASIMD -INST(asimd_VADD_float, "VADD (floating-point)", "111100100D0znnnndddd1101NQM0mmmm") // ASIMD -INST(asimd_VSUB_float, "VSUB (floating-point)", "111100100D1znnnndddd1101NQM0mmmm") // ASIMD -INST(asimd_VPADD_float, "VPADD (floating-point)", "111100110D0znnnndddd1101NQM0mmmm") // ASIMD -INST(asimd_VABD_float, "VABD (floating-point)", "111100110D1znnnndddd1101NQM0mmmm") // ASIMD -INST(asimd_VMLA_float, "VMLA (floating-point)", "111100100D0znnnndddd1101NQM1mmmm") // ASIMD -INST(asimd_VMLS_float, "VMLS (floating-point)", "111100100D1znnnndddd1101NQM1mmmm") // ASIMD -INST(asimd_VMUL_float, "VMUL (floating-point)", "111100110D0znnnndddd1101NQM1mmmm") // ASIMD -INST(asimd_VCEQ_reg_float, "VCEQ (register)", "111100100D0znnnndddd1110NQM0mmmm") // ASIMD -INST(asimd_VCGE_reg_float, "VCGE (register)", "111100110D0znnnndddd1110NQM0mmmm") // ASIMD -INST(asimd_VCGT_reg_float, "VCGT (register)", "111100110D1znnnndddd1110NQM0mmmm") // ASIMD -INST(asimd_VACGE, "VACGE", "111100110Doznnnndddd1110NQM1mmmm") // ASIMD -INST(asimd_VMAX_float, "VMAX (floating-point)", "111100100D0znnnndddd1111NQM0mmmm") // ASIMD -INST(asimd_VMIN_float, "VMIN (floating-point)", "111100100D1znnnndddd1111NQM0mmmm") // ASIMD -INST(asimd_VPMAX_float, "VPMAX (floating-point)", "111100110D0znnnndddd1111NQM0mmmm") // ASIMD -INST(asimd_VPMIN_float, "VPMIN (floating-point)", "111100110D1znnnndddd1111NQM0mmmm") // ASIMD -INST(asimd_VRECPS, "VRECPS", "111100100D0znnnndddd1111NQM1mmmm") // ASIMD -INST(asimd_VRSQRTS, "VRSQRTS", "111100100D1znnnndddd1111NQM1mmmm") // ASIMD -INST(v8_SHA256H, "SHA256H", "111100110D00nnnndddd1100NQM0mmmm") // v8 -INST(v8_SHA256H2, "SHA256H2", "111100110D01nnnndddd1100NQM0mmmm") // v8 -INST(v8_SHA256SU1, "SHA256SU1", "111100110D10nnnndddd1100NQM0mmmm") // v8 +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// DO NOT REORDER -// Three registers of different lengths -INST(asimd_VADDL, "VADDL/VADDW", "1111001U1Dzznnnndddd000oN0M0mmmm") // ASIMD -INST(asimd_VSUBL, "VSUBL/VSUBW", "1111001U1Dzznnnndddd001oN0M0mmmm") // ASIMD -//INST(asimd_VADDHN, "VADDHN", "111100101-----------0100-0-0----") // ASIMD -//INST(asimd_VRADDHN, "VRADDHN", "111100111-----------0100-0-0----") // ASIMD -INST(asimd_VABAL, "VABAL", "1111001U1Dzznnnndddd0101N0M0mmmm") // ASIMD -//INST(asimd_VSUBHN, "VSUBHN", "111100101-----------0110-0-0----") // ASIMD -//INST(asimd_VRSUBHN, "VRSUBHN", "111100111-----------0110-0-0----") // ASIMD -INST(asimd_VABDL, "VABDL", "1111001U1Dzznnnndddd0111N0M0mmmm") // ASIMD -INST(asimd_VMLAL, "VMLAL/VMLSL", "1111001U1Dzznnnndddd10o0N0M0mmmm") // ASIMD -//INST(asimd_VQDMLAL, "VQDMLAL", "111100101-----------10-1-0-0----") // ASIMD -INST(asimd_VMULL, "VMULL", "1111001U1Dzznnnndddd11P0N0M0mmmm") // ASIMD -//INST(asimd_VQDMULL, "VQDMULL", "111100101-----------1101-0-0----") // ASIMD - -// Two registers and a scalar -INST(asimd_VMLA_scalar, "VMLA (scalar)", "1111001Q1Dzznnnndddd0o0FN1M0mmmm") // ASIMD -INST(asimd_VMLAL_scalar, "VMLAL (scalar)", "1111001U1dzznnnndddd0o10N1M0mmmm") // ASIMD -//INST(asimd_VQDMLAL_scalar, "VQDMLAL/VQDMLSL (scalar)", "111100101-BB--------0x11-1-0----") // ASIMD -INST(asimd_VMUL_scalar, "VMUL (scalar)", "1111001Q1Dzznnnndddd100FN1M0mmmm") // ASIMD -INST(asimd_VMULL_scalar, "VMULL (scalar)", "1111001U1Dzznnnndddd1010N1M0mmmm") // ASIMD -INST(asimd_VQDMULL_scalar, "VQDMULL (scalar)", "111100101Dzznnnndddd1011N1M0mmmm") // ASIMD -INST(asimd_VQDMULH_scalar, "VQDMULH (scalar)", "1111001Q1Dzznnnndddd1100N1M0mmmm") // ASIMD -INST(asimd_VQRDMULH_scalar, "VQRDMULH (scalar)", "1111001Q1Dzznnnndddd1101N1M0mmmm") // ASIMD - -// Two registers and a shift amount -INST(asimd_SHR, "SHR", "1111001U1Diiiiiidddd0000LQM1mmmm") // ASIMD -INST(asimd_SRA, "SRA", "1111001U1Diiiiiidddd0001LQM1mmmm") // ASIMD -INST(asimd_VRSHR, "VRSHR", "1111001U1Diiiiiidddd0010LQM1mmmm") // ASIMD -INST(asimd_VRSRA, "VRSRA", "1111001U1Diiiiiidddd0011LQM1mmmm") // ASIMD -INST(asimd_VSRI, "VSRI", "111100111Diiiiiidddd0100LQM1mmmm") // ASIMD -INST(asimd_VSHL, "VSHL", "111100101Diiiiiidddd0101LQM1mmmm") // ASIMD -INST(asimd_VSLI, "VSLI", "111100111Diiiiiidddd0101LQM1mmmm") // ASIMD -INST(asimd_VQSHL, "VQSHL" , "1111001U1Diiiiiidddd011oLQM1mmmm") // ASIMD -INST(asimd_VSHRN, "VSHRN", "111100101Diiiiiidddd100000M1mmmm") // ASIMD -INST(asimd_VRSHRN, "VRSHRN", "111100101Diiiiiidddd100001M1mmmm") // ASIMD -INST(asimd_VQSHRUN, "VQSHRUN", "111100111Diiiiiidddd100000M1mmmm") // ASIMD -INST(asimd_VQRSHRUN, "VQRSHRUN", "111100111Diiiiiidddd100001M1mmmm") // ASIMD -INST(asimd_VQSHRN, "VQSHRN", "1111001U1Diiiiiidddd100100M1mmmm") // ASIMD -INST(asimd_VQRSHRN, "VQRSHRN", "1111001U1Diiiiiidddd100101M1mmmm") // ASIMD -INST(asimd_VSHLL, "VSHLL", "1111001U1Diiiiiidddd101000M1mmmm") // ASIMD -INST(asimd_VCVT_fixed, "VCVT (fixed-point)", "1111001U1Diiiiiidddd111o0QM1mmmm") // ASIMD - -// Two registers, miscellaneous -INST(asimd_VREV, "VREV{16,32,64}", "111100111D11zz00dddd000ooQM0mmmm") // ASIMD -INST(asimd_VPADDL, "VPADDL", "111100111D11zz00dddd0010oQM0mmmm") // ASIMD -INST(asimd_VCLS, "VCLS", "111100111D11zz00dddd01000QM0mmmm") // ASIMD -INST(asimd_VCLZ, "VCLZ", "111100111D11zz00dddd01001QM0mmmm") // ASIMD -INST(asimd_VCNT, "VCNT", "111100111D11zz00dddd01010QM0mmmm") // ASIMD -INST(asimd_VMVN_reg, "VMVN_reg", "111100111D11zz00dddd01011QM0mmmm") // ASIMD -INST(asimd_VPADAL, "VPADAL", "111100111D11zz00dddd0110oQM0mmmm") // ASIMD -INST(asimd_VQABS, "VQABS", "111100111D11zz00dddd01110QM0mmmm") // ASIMD -INST(asimd_VQNEG, "VQNEG", "111100111D11zz00dddd01111QM0mmmm") // ASIMD -INST(asimd_VCGT_zero, "VCGT (zero)", "111100111D11zz01dddd0F000QM0mmmm") // ASIMD -INST(asimd_VCGE_zero, "VCGE (zero)", "111100111D11zz01dddd0F001QM0mmmm") // ASIMD -INST(asimd_VCEQ_zero, "VCEQ (zero)", "111100111D11zz01dddd0F010QM0mmmm") // ASIMD -INST(asimd_VCLE_zero, "VCLE (zero)", "111100111D11zz01dddd0F011QM0mmmm") // ASIMD -INST(asimd_VCLT_zero, "VCLT (zero)", "111100111D11zz01dddd0F100QM0mmmm") // ASIMD -INST(arm_UDF, "UNALLOCATED", "111100111-11--01----01101--0----") // v8 -INST(asimd_VABS, "VABS", "111100111D11zz01dddd0F110QM0mmmm") // ASIMD -INST(asimd_VNEG, "VNEG", "111100111D11zz01dddd0F111QM0mmmm") // ASIMD -INST(asimd_VSWP, "VSWP", "111100111D110010dddd00000QM0mmmm") // ASIMD -INST(arm_UDF, "UNALLOCATED", "111100111-11--10----00000--0----") // ASIMD -INST(asimd_VTRN, "VTRN", "111100111D11zz10dddd00001QM0mmmm") // ASIMD -INST(asimd_VUZP, "VUZP", "111100111D11zz10dddd00010QM0mmmm") // ASIMD -INST(asimd_VZIP, "VZIP", "111100111D11zz10dddd00011QM0mmmm") // ASIMD -INST(asimd_VMOVN, "VMOVN", "111100111D11zz10dddd001000M0mmmm") // ASIMD -INST(asimd_VQMOVUN, "VQMOVUN", "111100111D11zz10dddd001001M0mmmm") // ASIMD -INST(asimd_VQMOVN, "VQMOVN", "111100111D11zz10dddd00101oM0mmmm") // ASIMD -INST(asimd_VSHLL_max, "VSHLL_max", "111100111D11zz10dddd001100M0mmmm") // ASIMD -INST(v8_VRINTN, "VRINTN", "111100111D11zz10dddd01000QM0mmmm") // v8 -INST(v8_VRINTX, "VRINTX", "111100111D11zz10dddd01001QM0mmmm") // v8 -INST(v8_VRINTA, "VRINTA", "111100111D11zz10dddd01010QM0mmmm") // v8 -INST(v8_VRINTZ, "VRINTZ", "111100111D11zz10dddd01011QM0mmmm") // v8 -INST(v8_VRINTM, "VRINTM", "111100111D11zz10dddd01101QM0mmmm") // v8 -INST(v8_VRINTP, "VRINTP", "111100111D11zz10dddd01111QM0mmmm") // v8 -INST(asimd_VCVT_half, "VCVT (half-precision)", "111100111D11zz10dddd011o00M0mmmm") // ASIMD -INST(arm_UDF, "UNALLOCATED", "111100111-11--10----011-01-0----") // ASIMD -INST(v8_VCVTA, "VCVTA", "111100111D11zz11dddd0000oQM0mmmm") // v8 -INST(v8_VCVTN, "VCVTN", "111100111D11zz11dddd0001oQM0mmmm") // v8 -INST(v8_VCVTP, "VCVTP", "111100111D11zz11dddd0010oQM0mmmm") // v8 -INST(v8_VCVTM, "VCVTM", "111100111D11zz11dddd0011oQM0mmmm") // v8 -INST(asimd_VRECPE, "VRECPE", "111100111D11zz11dddd010F0QM0mmmm") // ASIMD -INST(asimd_VRSQRTE, "VRSQRTE", "111100111D11zz11dddd010F1QM0mmmm") // ASIMD -INST(asimd_VCVT_integer, "VCVT (integer)", "111100111D11zz11dddd011oUQM0mmmm") // ASIMD - -// Two registers, cryptography -INST(v8_AESE, "AESE", "111100111D11zz00dddd001100M0mmmm") // v8 -INST(v8_AESD, "AESD", "111100111D11zz00dddd001101M0mmmm") // v8 -INST(v8_AESMC, "AESMC", "111100111D11zz00dddd001110M0mmmm") // v8 -INST(v8_AESIMC, "AESIMC", "111100111D11zz00dddd001111M0mmmm") // v8 -INST(arm_UDF, "UNALLOCATED", "111100111-11--01----001010-0----") // v8 -INST(arm_UDF, "UNALLOCATED (SHA1H)", "111100111-11--01----001011-0----") // v8 -INST(arm_UDF, "UNALLOCATED (SHA1SU1)", "111100111-11--10----001110-0----") // v8 -INST(v8_SHA256SU0, "SHA256SU0", "111100111D11zz10dddd001111M0mmmm") // v8 - -// One register and modified immediate -INST(asimd_VMOV_imm, "VBIC, VMOV, VMVN, VORR (immediate)", "1111001a1D000bcdVVVVmmmm0Qo1efgh") // ASIMD - -// Miscellaneous -INST(asimd_VEXT, "VEXT", "111100101D11nnnnddddiiiiNQM0mmmm") // ASIMD -INST(asimd_VTBL, "VTBL", "111100111D11nnnndddd10zzN0M0mmmm") // ASIMD -INST(asimd_VTBX, "VTBX", "111100111D11nnnndddd10zzN1M0mmmm") // ASIMD -INST(asimd_VDUP_scalar, "VDUP (scalar)", "111100111D11iiiidddd11000QM0mmmm") // ASIMD -INST(arm_UDF, "UNALLOCATED", "111100111-11--------11-----0----") // ASIMD - -// Advanced SIMD load/store structures -INST(v8_VST_multiple, "VST{1-4} (multiple)", "111101000D00nnnnddddxxxxzzaammmm") // v8 -INST(v8_VLD_multiple, "VLD{1-4} (multiple)", "111101000D10nnnnddddxxxxzzaammmm") // v8 -INST(arm_UDF, "UNALLOCATED", "111101000--0--------1011--------") // v8 -INST(arm_UDF, "UNALLOCATED", "111101000--0--------11----------") // v8 -INST(arm_UDF, "UNALLOCATED", "111101001-00--------11----------") // v8 -INST(v8_VLD_all_lanes, "VLD{1-4} (all lanes)", "111101001D10nnnndddd11nnzzTammmm") // v8 -INST(v8_VST_single, "VST{1-4} (single)", "111101001D00nnnnddddzzNNaaaammmm") // v8 -INST(v8_VLD_single, "VLD{1-4} (single)", "111101001D10nnnnddddzzNNaaaammmm") // v8 +INST(asimd_VMOV_imm, "VBIC, VMOV, VMVN, VORR (immediate)", "1111001a1D000bcdVVVVmmmm0Qo1efgh") +INST(asimd_VEXT, "VEXT", "111100101D11nnnnddddiiiiNQM0mmmm") +INST(asimd_VTBL, "VTBL", "111100111D11nnnndddd10zzN0M0mmmm") +INST(asimd_VTBX, "VTBX", "111100111D11nnnndddd10zzN1M0mmmm") +INST(asimd_VDUP_scalar, "VDUP (scalar)", "111100111D11iiiidddd11000QM0mmmm") +INST(asimd_VSWP, "VSWP", "111100111D110010dddd00000QM0mmmm") +INST(asimd_VMOVN, "VMOVN", "111100111D11zz10dddd001000M0mmmm") +INST(asimd_VQMOVUN, "VQMOVUN", "111100111D11zz10dddd001001M0mmmm") +INST(asimd_VSHLL_max, "VSHLL_max", "111100111D11zz10dddd001100M0mmmm") +INST(v8_AESE, "AESE", "111100111D11zz00dddd001100M0mmmm") +INST(v8_AESD, "AESD", "111100111D11zz00dddd001101M0mmmm") +INST(v8_AESMC, "AESMC", "111100111D11zz00dddd001110M0mmmm") +INST(v8_AESIMC, "AESIMC", "111100111D11zz00dddd001111M0mmmm") +INST(arm_UDF, "UNALLOCATED", "111100111-11--01----001010-0----") +INST(arm_UDF, "UNALLOCATED (SHA1H)", "111100111-11--01----001011-0----") +INST(arm_UDF, "UNALLOCATED (SHA1SU1)", "111100111-11--10----001110-0----") +INST(v8_SHA256SU0, "SHA256SU0", "111100111D11zz10dddd001111M0mmmm") +INST(asimd_VCLS, "VCLS", "111100111D11zz00dddd01000QM0mmmm") +INST(asimd_VCLZ, "VCLZ", "111100111D11zz00dddd01001QM0mmmm") +INST(asimd_VCNT, "VCNT", "111100111D11zz00dddd01010QM0mmmm") +INST(asimd_VMVN_reg, "VMVN_reg", "111100111D11zz00dddd01011QM0mmmm") +INST(asimd_VQABS, "VQABS", "111100111D11zz00dddd01110QM0mmmm") +INST(asimd_VQNEG, "VQNEG", "111100111D11zz00dddd01111QM0mmmm") +INST(arm_UDF, "UNALLOCATED", "111100111-11--01----01101--0----") +INST(arm_UDF, "UNALLOCATED", "111100111-11--10----00000--0----") +INST(asimd_VTRN, "VTRN", "111100111D11zz10dddd00001QM0mmmm") +INST(asimd_VUZP, "VUZP", "111100111D11zz10dddd00010QM0mmmm") +INST(asimd_VZIP, "VZIP", "111100111D11zz10dddd00011QM0mmmm") +INST(asimd_VQMOVN, "VQMOVN", "111100111D11zz10dddd00101oM0mmmm") +INST(v8_VRINTN, "VRINTN", "111100111D11zz10dddd01000QM0mmmm") +INST(v8_VRINTX, "VRINTX", "111100111D11zz10dddd01001QM0mmmm") +INST(v8_VRINTA, "VRINTA", "111100111D11zz10dddd01010QM0mmmm") +INST(v8_VRINTZ, "VRINTZ", "111100111D11zz10dddd01011QM0mmmm") +INST(v8_VRINTM, "VRINTM", "111100111D11zz10dddd01101QM0mmmm") +INST(v8_VRINTP, "VRINTP", "111100111D11zz10dddd01111QM0mmmm") +INST(asimd_VCVT_half, "VCVT (half-precision)", "111100111D11zz10dddd011o00M0mmmm") +INST(arm_UDF, "UNALLOCATED", "111100111-11--10----011-01-0----") +INST(asimd_VPADDL, "VPADDL", "111100111D11zz00dddd0010oQM0mmmm") +INST(asimd_VPADAL, "VPADAL", "111100111D11zz00dddd0110oQM0mmmm") +INST(asimd_VCGT_zero, "VCGT (zero)", "111100111D11zz01dddd0F000QM0mmmm") +INST(asimd_VCGE_zero, "VCGE (zero)", "111100111D11zz01dddd0F001QM0mmmm") +INST(asimd_VCEQ_zero, "VCEQ (zero)", "111100111D11zz01dddd0F010QM0mmmm") +INST(asimd_VCLE_zero, "VCLE (zero)", "111100111D11zz01dddd0F011QM0mmmm") +INST(asimd_VCLT_zero, "VCLT (zero)", "111100111D11zz01dddd0F100QM0mmmm") +INST(asimd_VABS, "VABS", "111100111D11zz01dddd0F110QM0mmmm") +INST(asimd_VNEG, "VNEG", "111100111D11zz01dddd0F111QM0mmmm") +INST(v8_VCVTA, "VCVTA", "111100111D11zz11dddd0000oQM0mmmm") +INST(v8_VCVTN, "VCVTN", "111100111D11zz11dddd0001oQM0mmmm") +INST(v8_VCVTP, "VCVTP", "111100111D11zz11dddd0010oQM0mmmm") +INST(v8_VCVTM, "VCVTM", "111100111D11zz11dddd0011oQM0mmmm") +INST(asimd_VRECPE, "VRECPE", "111100111D11zz11dddd010F0QM0mmmm") +INST(asimd_VRSQRTE, "VRSQRTE", "111100111D11zz11dddd010F1QM0mmmm") +INST(asimd_VREV, "VREV{16,32,64}", "111100111D11zz00dddd000ooQM0mmmm") +INST(asimd_VCVT_integer, "VCVT (integer)", "111100111D11zz11dddd011oUQM0mmmm") +INST(asimd_VAND_reg, "VAND (register)", "111100100D00nnnndddd0001NQM1mmmm") +INST(asimd_VBIC_reg, "VBIC (register)", "111100100D01nnnndddd0001NQM1mmmm") +INST(asimd_VORR_reg, "VORR (register)", "111100100D10nnnndddd0001NQM1mmmm") +INST(asimd_VORN_reg, "VORN (register)", "111100100D11nnnndddd0001NQM1mmmm") +INST(asimd_VEOR_reg, "VEOR (register)", "111100110D00nnnndddd0001NQM1mmmm") +INST(asimd_VBSL, "VBSL", "111100110D01nnnndddd0001NQM1mmmm") +INST(asimd_VBIT, "VBIT", "111100110D10nnnndddd0001NQM1mmmm") +INST(asimd_VBIF, "VBIF", "111100110D11nnnndddd0001NQM1mmmm") +INST(v8_SHA256H, "SHA256H", "111100110D00nnnndddd1100NQM0mmmm") +INST(v8_SHA256H2, "SHA256H2", "111100110D01nnnndddd1100NQM0mmmm") +INST(v8_SHA256SU1, "SHA256SU1", "111100110D10nnnndddd1100NQM0mmmm") +INST(asimd_VSHRN, "VSHRN", "111100101Diiiiiidddd100000M1mmmm") +INST(asimd_VRSHRN, "VRSHRN", "111100101Diiiiiidddd100001M1mmmm") +INST(asimd_VQSHRUN, "VQSHRUN", "111100111Diiiiiidddd100000M1mmmm") +INST(asimd_VQRSHRUN, "VQRSHRUN", "111100111Diiiiiidddd100001M1mmmm") +INST(v8_VMAXNM, "VMAXNM", "111100110D0znnnndddd1111NQM1mmmm") +INST(v8_VMINNM, "VMINNM", "111100110D1znnnndddd1111NQM1mmmm") +INST(asimd_VFMA, "VFMA", "111100100D0znnnndddd1100NQM1mmmm") +INST(asimd_VFMS, "VFMS", "111100100D1znnnndddd1100NQM1mmmm") +INST(asimd_VADD_float, "VADD (floating-point)", "111100100D0znnnndddd1101NQM0mmmm") +INST(asimd_VSUB_float, "VSUB (floating-point)", "111100100D1znnnndddd1101NQM0mmmm") +INST(asimd_VPADD_float, "VPADD (floating-point)", "111100110D0znnnndddd1101NQM0mmmm") +INST(asimd_VABD_float, "VABD (floating-point)", "111100110D1znnnndddd1101NQM0mmmm") +INST(asimd_VMLA_float, "VMLA (floating-point)", "111100100D0znnnndddd1101NQM1mmmm") +INST(asimd_VMLS_float, "VMLS (floating-point)", "111100100D1znnnndddd1101NQM1mmmm") +INST(asimd_VMUL_float, "VMUL (floating-point)", "111100110D0znnnndddd1101NQM1mmmm") +INST(asimd_VCEQ_reg_float, "VCEQ (register)", "111100100D0znnnndddd1110NQM0mmmm") +INST(asimd_VCGE_reg_float, "VCGE (register)", "111100110D0znnnndddd1110NQM0mmmm") +INST(asimd_VCGT_reg_float, "VCGT (register)", "111100110D1znnnndddd1110NQM0mmmm") +INST(asimd_VMAX_float, "VMAX (floating-point)", "111100100D0znnnndddd1111NQM0mmmm") +INST(asimd_VMIN_float, "VMIN (floating-point)", "111100100D1znnnndddd1111NQM0mmmm") +INST(asimd_VPMAX_float, "VPMAX (floating-point)", "111100110D0znnnndddd1111NQM0mmmm") +INST(asimd_VPMIN_float, "VPMIN (floating-point)", "111100110D1znnnndddd1111NQM0mmmm") +INST(asimd_VRECPS, "VRECPS", "111100100D0znnnndddd1111NQM1mmmm") +INST(asimd_VRSQRTS, "VRSQRTS", "111100100D1znnnndddd1111NQM1mmmm") +INST(asimd_VQSHRN, "VQSHRN", "1111001U1Diiiiiidddd100100M1mmmm") +INST(asimd_VQRSHRN, "VQRSHRN", "1111001U1Diiiiiidddd100101M1mmmm") +INST(asimd_VSHLL, "VSHLL", "1111001U1Diiiiiidddd101000M1mmmm") +INST(asimd_VADD_int, "VADD (integer)", "111100100Dzznnnndddd1000NQM0mmmm") +INST(asimd_VSUB_int, "VSUB (integer)", "111100110Dzznnnndddd1000NQM0mmmm") +INST(asimd_VTST, "VTST", "111100100Dzznnnndddd1000NQM1mmmm") +INST(asimd_VCEQ_reg, "VCEG (register)", "111100110Dzznnnndddd1000NQM1mmmm") +INST(asimd_VQDMULH, "VQDMULH", "111100100Dzznnnndddd1011NQM0mmmm") +INST(asimd_VQRDMULH, "VQRDMULH", "111100110Dzznnnndddd1011NQM0mmmm") +INST(asimd_VPADD, "VPADD", "111100100Dzznnnndddd1011NQM1mmmm") +INST(asimd_VACGE, "VACGE", "111100110Doznnnndddd1110NQM1mmmm") +INST(asimd_VABAL, "VABAL", "1111001U1Dzznnnndddd0101N0M0mmmm") +INST(asimd_VABDL, "VABDL", "1111001U1Dzznnnndddd0111N0M0mmmm") +INST(asimd_VSRI, "VSRI", "111100111Diiiiiidddd0100LQM1mmmm") +INST(asimd_VSHL, "VSHL", "111100101Diiiiiidddd0101LQM1mmmm") +INST(asimd_VSLI, "VSLI", "111100111Diiiiiidddd0101LQM1mmmm") +INST(arm_UDF, "UNALLOCATED", "111100111-11--------11-----0----") +INST(arm_UDF, "UNALLOCATED", "111101000--0--------1011--------") +INST(asimd_VHADD, "VHADD", "1111001U0Dzznnnndddd0000NQM0mmmm") +INST(asimd_VQADD, "VQADD", "1111001U0Dzznnnndddd0000NQM1mmmm") +INST(asimd_VRHADD, "VRHADD", "1111001U0Dzznnnndddd0001NQM0mmmm") +INST(asimd_VHSUB, "VHSUB", "1111001U0Dzznnnndddd0010NQM0mmmm") +INST(asimd_VQSUB, "VQSUB", "1111001U0Dzznnnndddd0010NQM1mmmm") +INST(asimd_VCGT_reg, "VCGT (register)", "1111001U0Dzznnnndddd0011NQM0mmmm") +INST(asimd_VCGE_reg, "VCGE (register)", "1111001U0Dzznnnndddd0011NQM1mmmm") +INST(asimd_VSHL_reg, "VSHL (register)", "1111001U0Dzznnnndddd0100NQM0mmmm") +INST(asimd_VQSHL_reg, "VQSHL (register)", "1111001U0Dzznnnndddd0100NQM1mmmm") +INST(asimd_VRSHL, "VRSHL", "1111001U0Dzznnnndddd0101NQM0mmmm") +INST(asimd_VABD, "VABD", "1111001U0Dzznnnndddd0111NQM0mmmm") +INST(asimd_VABA, "VABA", "1111001U0Dzznnnndddd0111NQM1mmmm") +INST(asimd_VMLA, "VMLA/VMLS", "1111001o0Dzznnnndddd1001NQM0mmmm") +INST(asimd_VMUL, "VMUL", "1111001P0Dzznnnndddd1001NQM1mmmm") +INST(asimd_VADDL, "VADDL/VADDW", "1111001U1Dzznnnndddd000oN0M0mmmm") +INST(asimd_VSUBL, "VSUBL/VSUBW", "1111001U1Dzznnnndddd001oN0M0mmmm") +INST(asimd_VMLAL, "VMLAL/VMLSL", "1111001U1Dzznnnndddd10o0N0M0mmmm") +INST(asimd_VMULL, "VMULL", "1111001U1Dzznnnndddd11P0N0M0mmmm") +INST(asimd_SHR, "SHR", "1111001U1Diiiiiidddd0000LQM1mmmm") +INST(asimd_SRA, "SRA", "1111001U1Diiiiiidddd0001LQM1mmmm") +INST(asimd_VRSHR, "VRSHR", "1111001U1Diiiiiidddd0010LQM1mmmm") +INST(asimd_VRSRA, "VRSRA", "1111001U1Diiiiiidddd0011LQM1mmmm") +INST(asimd_VCVT_fixed, "VCVT (fixed-point)", "1111001U1Diiiiiidddd111o0QM1mmmm") +INST(arm_UDF, "UNALLOCATED", "111101001-00--------11----------") +INST(v8_VLD_all_lanes, "VLD{1-4} (all lanes)", "111101001D10nnnndddd11nnzzTammmm") +INST(asimd_VMAX, "VMAX/VMIN (integer)", "1111001U0Dzznnnnmmmm0110NQMommmm") +INST(asimd_VPMAX_int, "VPMAX/VPMIN (integer)", "1111001U0Dzznnnndddd1010NQMommmm") +INST(asimd_VQSHL, "VQSHL", "1111001U1Diiiiiidddd011oLQM1mmmm") +INST(arm_UDF, "UNALLOCATED", "111101000--0--------11----------") +INST(v8_VST_multiple, "VST{1-4} (multiple)", "111101000D00nnnnddddxxxxzzaammmm") +INST(v8_VLD_multiple, "VLD{1-4} (multiple)", "111101000D10nnnnddddxxxxzzaammmm") +INST(v8_VST_single, "VST{1-4} (single)", "111101001D00nnnnddddzzNNaaaammmm") +INST(v8_VLD_single, "VLD{1-4} (single)", "111101001D10nnnnddddzzNNaaaammmm") +INST(asimd_VMLA_scalar, "VMLA (scalar)", "1111001Q1Dzznnnndddd0o0FN1M0mmmm") +INST(asimd_VMLAL_scalar, "VMLAL (scalar)", "1111001U1dzznnnndddd0o10N1M0mmmm") +INST(asimd_VMUL_scalar, "VMUL (scalar)", "1111001Q1Dzznnnndddd100FN1M0mmmm") +INST(asimd_VMULL_scalar, "VMULL (scalar)", "1111001U1Dzznnnndddd1010N1M0mmmm") +INST(asimd_VQDMULL_scalar, "VQDMULL (scalar)", "111100101Dzznnnndddd1011N1M0mmmm") +INST(asimd_VQDMULH_scalar, "VQDMULH (scalar)", "1111001Q1Dzznnnndddd1100N1M0mmmm") +INST(asimd_VQRDMULH_scalar, "VQRDMULH (scalar)", "1111001Q1Dzznnnndddd1101N1M0mmmm") diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h index 16b99ba5aa..eae296f59c 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -24,8 +24,8 @@ template using Thumb16Matcher = Decoder::Matcher; template -std::optional>> DecodeThumb16(u16 instruction) { - alignas(64) static const std::vector> table = { +static std::optional>> DecodeThumb16(u16 instruction) { + alignas(64) static const auto table = std::array{ #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)), #include "./thumb16.inc" #undef INST @@ -37,7 +37,7 @@ std::optional>> DecodeThumb16(u16 } template -std::optional GetNameThumb16(u32 inst) noexcept { +static std::optional GetNameThumb16(u32 inst) noexcept { std::vector>> list = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)) }, #include "./thumb16.inc" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h index 19418de67c..d82aef73fa 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -23,8 +23,8 @@ template using Thumb32Matcher = Decoder::Matcher; template -std::optional>> DecodeThumb32(u32 instruction) { - alignas(64) static const std::vector> table = { +static std::optional>> DecodeThumb32(u32 instruction) { + alignas(64) static const auto table = std::array{ #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./thumb32.inc" #undef INST @@ -36,7 +36,7 @@ std::optional>> DecodeThumb32(u32 } template -std::optional GetNameThumb32(u32 inst) noexcept { +static std::optional GetNameThumb32(u32 inst) noexcept { std::vector>> list = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./thumb32.inc" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index a346304a9a..f1728e452b 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -24,7 +24,7 @@ template using VFPMatcher = Decoder::Matcher; template -std::optional>> DecodeVFP(u32 instruction) { +static std::optional>> DecodeVFP(u32 instruction) { using Table = std::vector>; alignas(64) static const struct Tables { Table unconditional; @@ -52,7 +52,7 @@ std::optional>> DecodeVFP(u32 instruc } template -std::optional GetNameVFP(u32 inst) noexcept { +static std::optional GetNameVFP(u32 inst) noexcept { std::vector>> list = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./vfp.inc" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/conditional_state.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/conditional_state.cpp index 8c55588a28..82d25f1337 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/conditional_state.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/conditional_state.cpp @@ -32,8 +32,7 @@ bool CondCanContinue(const ConditionalState cond_state, const A32::IREmitter& ir } bool IsConditionPassed(TranslatorVisitor& v, IR::Cond cond) { - ASSERT_MSG(v.cond_state != ConditionalState::Break, - "This should never happen. We requested a break but that wasn't honored."); + ASSERT(v.cond_state != ConditionalState::Break && "This should never happen. We requested a break but that wasn't honored."); if (cond == IR::Cond::NV) { // NV conditional is obsolete diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/a32_branch.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/a32_branch.cpp index d87cfcfe82..63b40b8c31 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/a32_branch.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/a32_branch.cpp @@ -1,9 +1,12 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2016 MerryMage * SPDX-License-Identifier: 0BSD */ -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A32/translate/impl/a32_translate_impl.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/a32_translate_impl.h b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/a32_translate_impl.h index a03b2e666b..a8888c355f 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/a32_translate_impl.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/a32_translate_impl.h @@ -9,8 +9,7 @@ #pragma once #include "dynarmic/common/assert.h" -#include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A32/a32_ir_emitter.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_load_store_structures.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_load_store_structures.cpp index 68c0d983af..8d0f78396a 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_load_store_structures.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_load_store_structures.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -9,7 +9,7 @@ #include #include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A32/translate/impl/a32_translate_impl.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_misc.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_misc.cpp index 9d73e7d4ae..9aa50c6b8c 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_misc.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_misc.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -9,8 +9,7 @@ #include #include "dynarmic/common/assert.h" -#include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A32/translate/impl/a32_translate_impl.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_one_reg_modified_immediate.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_one_reg_modified_immediate.cpp index 459cbfea06..c5bdb1b551 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_one_reg_modified_immediate.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_one_reg_modified_immediate.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -7,7 +7,7 @@ */ #include "dynarmic/common/assert.h" -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A32/translate/impl/a32_translate_impl.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_three_regs.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_three_regs.cpp index da8f43f2fb..8d5e694bca 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_three_regs.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_three_regs.cpp @@ -1,9 +1,12 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2020 MerryMage * SPDX-License-Identifier: 0BSD */ -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A32/translate/impl/a32_translate_impl.h" #include "dynarmic/frontend/A32/translate/impl/common.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_misc.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_misc.cpp index ddae1f420b..45455aa444 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_misc.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_misc.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2020 MerryMage * SPDX-License-Identifier: 0BSD @@ -5,7 +8,7 @@ #include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A32/translate/impl/a32_translate_impl.h" #include "dynarmic/frontend/A32/translate/impl/common.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_scalar.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_scalar.cpp index 4d6855f1ed..d9cc3b1e64 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_scalar.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_scalar.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -9,7 +9,7 @@ #include #include "dynarmic/common/assert.h" -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A32/translate/impl/a32_translate_impl.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_shift.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_shift.cpp index 27e94628a8..e5a4eb537f 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_shift.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_shift.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -7,8 +7,7 @@ */ #include "dynarmic/common/assert.h" -#include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A32/translate/impl/a32_translate_impl.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/load_store.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/load_store.cpp index 49c5d65b3a..d7c667aecf 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/load_store.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/load_store.cpp @@ -6,8 +6,7 @@ * SPDX-License-Identifier: 0BSD */ -#include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A32/translate/impl/a32_translate_impl.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/misc.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/misc.cpp index ef54b66827..bb33c3c47c 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/misc.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/misc.cpp @@ -1,9 +1,12 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2016 MerryMage * SPDX-License-Identifier: 0BSD */ -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A32/translate/impl/a32_translate_impl.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/status_register_access.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/status_register_access.cpp index 8dfc64d6d7..7a0640598c 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/status_register_access.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/status_register_access.cpp @@ -6,7 +6,7 @@ * SPDX-License-Identifier: 0BSD */ -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A32/translate/impl/a32_translate_impl.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb16.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb16.cpp index 926c499222..a8c75e22b9 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb16.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb16.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -6,7 +6,7 @@ * SPDX-License-Identifier: 0BSD */ -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A32/translate/impl/a32_translate_impl.h" #include "dynarmic/interface/A32/config.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_data_processing_plain_binary_immediate.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_data_processing_plain_binary_immediate.cpp index 090e98d1aa..7ea31d40ee 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_data_processing_plain_binary_immediate.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_data_processing_plain_binary_immediate.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -7,7 +7,7 @@ */ #include "dynarmic/common/assert.h" -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A32/translate/impl/a32_translate_impl.h" #include "dynarmic/frontend/A32/translate/impl/common.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_load_store_dual.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_load_store_dual.cpp index eb574d773c..e76666ecc2 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_load_store_dual.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_load_store_dual.cpp @@ -1,9 +1,12 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2021 MerryMage * SPDX-License-Identifier: 0BSD */ -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A32/translate/impl/a32_translate_impl.h" #include "dynarmic/frontend/A32/translate/impl/common.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_load_store_multiple.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_load_store_multiple.cpp index d446fbf3dd..b68a2cb7c5 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_load_store_multiple.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/thumb32_load_store_multiple.cpp @@ -1,9 +1,12 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2021 MerryMage * SPDX-License-Identifier: 0BSD */ -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A32/translate/impl/a32_translate_impl.h" #include "dynarmic/frontend/A32/translate/impl/common.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp index e0333e487d..309dd080f9 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp @@ -9,8 +9,7 @@ #include #include "dynarmic/common/assert.h" -#include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A32/a32_ir_emitter.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A64/a64_location_descriptor.h b/src/dynarmic/src/dynarmic/frontend/A64/a64_location_descriptor.h index 4bfc5f890e..a8be0232ca 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/a64_location_descriptor.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/a64_location_descriptor.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -13,7 +13,7 @@ #include #include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/common/fp/fpcr.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index 68422ed40d..533a93f3aa 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -15,7 +15,7 @@ #include #include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/frontend/decoder/decoder_detail.h" @@ -36,33 +36,19 @@ inline size_t ToFastLookupIndex(u32 instruction) { } // namespace detail template -constexpr DecodeTable GetDecodeTable() { - std::vector>> list = { -#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, -#include "./a64.inc" -#undef INST - }; - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) { - // If a matcher has more bits in its mask it is more specific, so it should come first. - return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); - }); - // Exceptions to the above rule of thumb. - std::stable_partition(list.begin(), list.end(), [&](const auto& e) { - return std::set{ - "MOVI, MVNI, ORR, BIC (vector, immediate)", - "FMOV (vector, immediate)", - "Unallocated SIMD modified immediate", - }.count(e.first) > 0; - }); +inline DecodeTable GetDecodeTable() { DecodeTable table{}; for (size_t i = 0; i < table.size(); ++i) { - for (auto const& e : list) { - const auto expect = detail::ToFastLookupIndex(e.second.GetExpected()); - const auto mask = detail::ToFastLookupIndex(e.second.GetMask()); - if ((i & mask) == expect) { - table[i].push_back(e.second); - } + // PLEASE HEAP ELLIDE + for (auto const& e : std::vector>{ +#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), +#include "./a64.inc" +#undef INST + }) { + const auto expect = detail::ToFastLookupIndex(e.GetExpected()); + const auto mask = detail::ToFastLookupIndex(e.GetMask()); + if ((i & mask) == expect) + table[i].push_back(e); } } return table; @@ -70,18 +56,19 @@ constexpr DecodeTable GetDecodeTable() { /// In practice it must always suceed, otherwise something else unrelated would have gone awry template -std::reference_wrapper> Decode(u32 instruction) { +inline std::optional>> Decode(u32 instruction) { alignas(64) static const auto table = GetDecodeTable(); const auto& subtable = table[detail::ToFastLookupIndex(instruction)]; auto iter = std::find_if(subtable.begin(), subtable.end(), [instruction](const auto& matcher) { return matcher.Matches(instruction); }); - DEBUG_ASSERT(iter != subtable.end()); - return std::reference_wrapper>(*iter); + return iter != subtable.end() + ? std::optional{ std::reference_wrapper>(*iter) } + : std::nullopt; } template -std::optional GetName(u32 inst) noexcept { +inline std::optional GetName(u32 inst) noexcept { std::vector>> list = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./a64.inc" diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.inc b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.inc index 23f8b71933..8a3fab7ac1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.inc +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.inc @@ -1,1029 +1,647 @@ -// Data processing - Immediate - PC relative addressing -INST(ADR, "ADR", "0ii10000iiiiiiiiiiiiiiiiiiiddddd") -INST(ADRP, "ADRP", "1ii10000iiiiiiiiiiiiiiiiiiiddddd") - -// Data processing - Immediate - Add/Sub (with tags) -//INST(ADDG, "ADDG", "1001000110iiiiii00IIIInnnnnddddd") // ARMv8.5 -//INST(SUBG, "SUBG", "1101000110iiiiii00IIIInnnnnddddd") // ARMv8.5 - -// Data processing - Immediate - Add/Sub -INST(ADD_imm, "ADD (immediate)", "z0010001ssiiiiiiiiiiiinnnnnddddd") -INST(ADDS_imm, "ADDS (immediate)", "z0110001ssiiiiiiiiiiiinnnnnddddd") -INST(SUB_imm, "SUB (immediate)", "z1010001ssiiiiiiiiiiiinnnnnddddd") -INST(SUBS_imm, "SUBS (immediate)", "z1110001ssiiiiiiiiiiiinnnnnddddd") - -// Data processing - Immediate - Logical -INST(AND_imm, "AND (immediate)", "z00100100Nrrrrrrssssssnnnnnddddd") -INST(ORR_imm, "ORR (immediate)", "z01100100Nrrrrrrssssssnnnnnddddd") -INST(EOR_imm, "EOR (immediate)", "z10100100Nrrrrrrssssssnnnnnddddd") -INST(ANDS_imm, "ANDS (immediate)", "z11100100Nrrrrrrssssssnnnnnddddd") - -// Data processing - Immediate - Move Wide -INST(MOVN, "MOVN", "z00100101ssiiiiiiiiiiiiiiiiddddd") -INST(MOVZ, "MOVZ", "z10100101ssiiiiiiiiiiiiiiiiddddd") -INST(MOVK, "MOVK", "z11100101ssiiiiiiiiiiiiiiiiddddd") - -// Data processing - Immediate - Bitfield -INST(SBFM, "SBFM", "z00100110Nrrrrrrssssssnnnnnddddd") -INST(BFM, "BFM", "z01100110Nrrrrrrssssssnnnnnddddd") -INST(UBFM, "UBFM", "z10100110Nrrrrrrssssssnnnnnddddd") -INST(ASR_1, "ASR (immediate, 32-bit)", "00010011000rrrrr011111nnnnnddddd") -INST(ASR_2, "ASR (immediate, 64-bit)", "1001001101rrrrrr111111nnnnnddddd") -INST(SXTB_1, "SXTB (32-bit)", "0001001100000000000111nnnnnddddd") -INST(SXTB_2, "SXTB (64-bit)", "1001001101000000000111nnnnnddddd") -INST(SXTH_1, "SXTH (32-bit)", "0001001100000000001111nnnnnddddd") -INST(SXTH_2, "SXTH (64-bit)", "1001001101000000001111nnnnnddddd") -INST(SXTW, "SXTW", "1001001101000000011111nnnnnddddd") - -// Data processing - Immediate - Extract -INST(EXTR, "EXTR", "z00100111N0mmmmmssssssnnnnnddddd") - -// Conditional branch -INST(B_cond, "B.cond", "01010100iiiiiiiiiiiiiiiiiii0cccc") - -// Exception generation -INST(SVC, "SVC", "11010100000iiiiiiiiiiiiiiii00001") -//INST(HVC, "HVC", "11010100000iiiiiiiiiiiiiiii00010") -//INST(SMC, "SMC", "11010100000iiiiiiiiiiiiiiii00011") -INST(BRK, "BRK", "11010100001iiiiiiiiiiiiiiii00000") -//INST(HLT, "HLT", "11010100010iiiiiiiiiiiiiiii00000") -//INST(DCPS1, "DCPS1", "11010100101iiiiiiiiiiiiiiii00001") -//INST(DCPS2, "DCPS2", "11010100101iiiiiiiiiiiiiiii00010") -//INST(DCPS3, "DCPS3", "11010100101iiiiiiiiiiiiiiii00011") - -// System -//INST(MSR_imm, "MSR (immediate)", "1101010100000ooo0100MMMMooo11111") -INST(HINT, "HINT", "11010101000000110010MMMMooo11111") -INST(NOP, "NOP", "11010101000000110010000000011111") -INST(YIELD, "YIELD", "11010101000000110010000000111111") -INST(WFE, "WFE", "11010101000000110010000001011111") -INST(WFI, "WFI", "11010101000000110010000001111111") -INST(SEV, "SEV", "11010101000000110010000010011111") -INST(SEVL, "SEVL", "11010101000000110010000010111111") -//INST(DGH, "DGH", "11010101000000110010000011011111") // v8.6 -//INST(WFET, "WFET", "110101010000001100010000000ddddd") // v8.7 -//INST(WFIT, "WFIT", "110101010000001100010000001ddddd") // v8.7 -//INST(XPAC_1, "XPACD, XPACI, XPACLRI", "110110101100000101000D11111ddddd") -//INST(XPAC_2, "XPACD, XPACI, XPACLRI", "11010101000000110010000011111111") -//INST(PACIA_1, "PACIA, PACIA1716, PACIASP, PACIAZ, PACIZA", "110110101100000100Z000nnnnnddddd") -//INST(PACIA_2, "PACIA, PACIA1716, PACIASP, PACIAZ, PACIZA", "1101010100000011001000-100-11111") -//INST(PACIB_1, "PACIB, PACIB1716, PACIBSP, PACIBZ, PACIZB", "110110101100000100Z001nnnnnddddd") -//INST(PACIB_2, "PACIB, PACIB1716, PACIBSP, PACIBZ, PACIZB", "1101010100000011001000-101-11111") -//INST(AUTIA_1, "AUTIA, AUTIA1716, AUTIASP, AUTIAZ, AUTIZA", "110110101100000100Z100nnnnnddddd") -//INST(AUTIA_2, "AUTIA, AUTIA1716, AUTIASP, AUTIAZ, AUTIZA", "1101010100000011001000-110-11111") -//INST(AUTIB_1, "AUTIB, AUTIB1716, AUTIBSP, AUTIBZ, AUTIZB", "110110101100000100Z101nnnnnddddd") -//INST(AUTIB_2, "AUTIB, AUTIB1716, AUTIBSP, AUTIBZ, AUTIZB", "1101010100000011001000-111-11111") -//INST(BTI, "BTI", "110101010000001100100100ii011111") // ARMv8.5 -//INST(ESB, "ESB", "11010101000000110010001000011111") -//INST(PSB, "PSB CSYNC", "11010101000000110010001000111111") -//INST(TSB, "TSB CSYNC", "11010101000000110010001001011111") // ARMv8.5 -//INST(CSDB, "CSDB", "11010101000000110010001010011111") -INST(CLREX, "CLREX", "11010101000000110011MMMM01011111") -INST(DSB, "DSB", "11010101000000110011MMMM10011111") -//INST(SSBB, "SSBB", "11010101000000110011000010011111") -//INST(PSSBB, "PSSBB", "11010101000000110011010010011111") -INST(DMB, "DMB", "11010101000000110011MMMM10111111") -INST(ISB, "ISB", "11010101000000110011MMMM11011111") -//INST(SB, "SB", "11010101000000110011000011111111") -//INST(SYS, "SYS", "1101010100001oooNNNNMMMMooottttt") -INST(MSR_reg, "MSR (register)", "110101010001poooNNNNMMMMooottttt") -//INST(SYSL, "SYSL", "1101010100101oooNNNNMMMMooottttt") -INST(MRS, "MRS", "110101010011poooNNNNMMMMooottttt") - -// System - Flag manipulation instructions -INST(CFINV, "CFINV", "11010101000000000100000000011111") // ARMv8.4 -INST(RMIF, "RMIF", "10111010000iiiiii00001nnnnn0IIII") // ARMv8.4 -//INST(SETF8, "SETF8", "0011101000000000000010nnnnn01101") // ARMv8.4 -//INST(SETF16, "SETF16", "0011101000000000010010nnnnn01101") // ARMv8.4 - -// System - Flag format instructions -INST(XAFlag, "XAFlag", "11010101000000000100000000111111") // ARMv8.5 -INST(AXFlag, "AXFlag", "11010101000000000100000001011111") // ARMv8.5 - -// SYS: Data Cache -INST(DC_IVAC, "DC IVAC", "110101010000100001110110001ttttt") -INST(DC_ISW, "DC ISW", "110101010000100001110110010ttttt") -INST(DC_CSW, "DC CSW", "110101010000100001111010010ttttt") -INST(DC_CISW, "DC CISW", "110101010000100001111110010ttttt") -INST(DC_ZVA, "DC ZVA", "110101010000101101110100001ttttt") -INST(DC_CVAC, "DC CVAC", "110101010000101101111010001ttttt") -INST(DC_CVAU, "DC CVAU", "110101010000101101111011001ttttt") -INST(DC_CVAP, "DC CVAP", "110101010000101101111100001ttttt") -INST(DC_CIVAC, "DC CIVAC", "110101010000101101111110001ttttt") - -// SYS: Instruction Cache -INST(IC_IALLU, "IC IALLU", "11010101000010000111010100011111") -INST(IC_IALLUIS, "IC IALLUIS", "11010101000010000111000100011111") -INST(IC_IVAU, "IC IVAU", "110101010000101101110101001ttttt") - -// Unconditional branch (Register) -INST(BLR, "BLR", "1101011000111111000000nnnnn00000") -INST(BR, "BR", "1101011000011111000000nnnnn00000") -//INST(DRPS, "DRPS", "11010110101111110000001111100000") -//INST(ERET, "ERET", "11010110100111110000001111100000") -INST(RET, "RET", "1101011001011111000000nnnnn00000") -//INST(BLRA, "BLRAA, BLRAAZ, BLRAB, BLRABZ", "1101011Z0011111100001Mnnnnnmmmmm") // ARMv8.3 -//INST(BRA, "BRAA, BRAAZ, BRAB, BRABZ", "1101011Z0001111100001Mnnnnnmmmmm") // ARMv8.3 -//INST(ERETA, "ERETAA, ERETAB", "110101101001111100001M1111111111") // ARMv8.3 -//INST(RETA, "RETAA, RETAB", "110101100101111100001M1111111111") // ARMv8.3 - -// Unconditional branch (immediate) -INST(B_uncond, "B", "000101iiiiiiiiiiiiiiiiiiiiiiiiii") -INST(BL, "BL", "100101iiiiiiiiiiiiiiiiiiiiiiiiii") - -// Compare and branch (immediate) -INST(CBZ, "CBZ", "z0110100iiiiiiiiiiiiiiiiiiittttt") -INST(CBNZ, "CBNZ", "z0110101iiiiiiiiiiiiiiiiiiittttt") -INST(TBZ, "TBZ", "b0110110bbbbbiiiiiiiiiiiiiittttt") -INST(TBNZ, "TBNZ", "b0110111bbbbbiiiiiiiiiiiiiittttt") - -// Loads and stores - Advanced SIMD Load/Store multiple structures -INST(STx_mult_1, "STx (multiple structures)", "0Q00110000000000oooozznnnnnttttt") -INST(STx_mult_2, "STx (multiple structures)", "0Q001100100mmmmmoooozznnnnnttttt") -INST(LDx_mult_1, "LDx (multiple structures)", "0Q00110001000000oooozznnnnnttttt") -INST(LDx_mult_2, "LDx (multiple structures)", "0Q001100110mmmmmoooozznnnnnttttt") - -// Loads and stores - Advanced SIMD Load/Store single structures -INST(ST1_sngl_1, "ST1 (single structure)", "0Q00110100000000oo0Szznnnnnttttt") -INST(ST1_sngl_2, "ST1 (single structure)", "0Q001101100mmmmmoo0Szznnnnnttttt") -INST(ST3_sngl_1, "ST3 (single structure)", "0Q00110100000000oo1Szznnnnnttttt") -INST(ST3_sngl_2, "ST3 (single structure)", "0Q001101100mmmmmoo1Szznnnnnttttt") -INST(ST2_sngl_1, "ST2 (single structure)", "0Q00110100100000oo0Szznnnnnttttt") -INST(ST2_sngl_2, "ST2 (single structure)", "0Q001101101mmmmmoo0Szznnnnnttttt") -INST(ST4_sngl_1, "ST4 (single structure)", "0Q00110100100000oo1Szznnnnnttttt") -INST(ST4_sngl_2, "ST4 (single structure)", "0Q001101101mmmmmoo1Szznnnnnttttt") -INST(LD1_sngl_1, "LD1 (single structure)", "0Q00110101000000oo0Szznnnnnttttt") -INST(LD1_sngl_2, "LD1 (single structure)", "0Q001101110mmmmmoo0Szznnnnnttttt") -INST(LD3_sngl_1, "LD3 (single structure)", "0Q00110101000000oo1Szznnnnnttttt") -INST(LD3_sngl_2, "LD3 (single structure)", "0Q001101110mmmmmoo1Szznnnnnttttt") -INST(LD1R_1, "LD1R", "0Q001101010000001100zznnnnnttttt") -INST(LD1R_2, "LD1R", "0Q001101110mmmmm1100zznnnnnttttt") -INST(LD3R_1, "LD3R", "0Q001101010000001110zznnnnnttttt") -INST(LD3R_2, "LD3R", "0Q001101110mmmmm1110zznnnnnttttt") -INST(LD2_sngl_1, "LD2 (single structure)", "0Q00110101100000oo0Szznnnnnttttt") -INST(LD2_sngl_2, "LD2 (single structure)", "0Q001101111mmmmmoo0Szznnnnnttttt") -INST(LD4_sngl_1, "LD4 (single structure)", "0Q00110101100000oo1Szznnnnnttttt") -INST(LD4_sngl_2, "LD4 (single structure)", "0Q001101111mmmmmoo1Szznnnnnttttt") -INST(LD2R_1, "LD2R", "0Q001101011000001100zznnnnnttttt") -INST(LD2R_2, "LD2R", "0Q001101111mmmmm1100zznnnnnttttt") -INST(LD4R_1, "LD4R", "0Q001101011000001110zznnnnnttttt") -INST(LD4R_2, "LD4R", "0Q001101111mmmmm1110zznnnnnttttt") - -// Loads and stores - Load/Store Exclusive -INST(STXR, "STXRB, STXRH, STXR", "zz001000000sssss011111nnnnnttttt") -INST(STLXR, "STLXRB, STLXRH, STLXR", "zz001000000sssss111111nnnnnttttt") -INST(STXP, "STXP", "1z001000001sssss0uuuuunnnnnttttt") -INST(STLXP, "STLXP", "1z001000001sssss1uuuuunnnnnttttt") -INST(LDXR, "LDXRB, LDXRH, LDXR", "zz00100001011111011111nnnnnttttt") -INST(LDAXR, "LDAXRB, LDAXRH, LDAXR", "zz00100001011111111111nnnnnttttt") -INST(LDXP, "LDXP", "1z001000011111110uuuuunnnnnttttt") -INST(LDAXP, "LDAXP", "1z001000011111111uuuuunnnnnttttt") -INST(STLLR, "STLLRB, STLLRH, STLLR", "zz00100010011111011111nnnnnttttt") -INST(STLR, "STLRB, STLRH, STLR", "zz00100010011111111111nnnnnttttt") -INST(LDLAR, "LDLARB, LDLARH, LDLAR", "zz00100011011111011111nnnnnttttt") -INST(LDAR, "LDARB, LDARH, LDAR", "zz00100011011111111111nnnnnttttt") -//INST(CASP, "CASP, CASPA, CASPAL, CASPL", "0z0010000L1sssssp11111nnnnnttttt") // ARMv8.1 -//INST(CASB, "CASB, CASAB, CASALB, CASLB", "000010001L1sssssp11111nnnnnttttt") // ARMv8.1 -//INST(CASH, "CASH, CASAH, CASALH, CASLH", "010010001L1sssssp11111nnnnnttttt") // ARMv8.1 -//INST(CAS, "CAS, CASA, CASAL, CASL", "1z0010001L1sssssp11111nnnnnttttt") // ARMv8.1 - -// Loads and stores - Load register (literal) -INST(LDR_lit_gen, "LDR (literal)", "0z011000iiiiiiiiiiiiiiiiiiittttt") -INST(LDRSW_lit, "LDRSW (literal)", "10011000iiiiiiiiiiiiiiiiiiittttt") -INST(PRFM_lit, "PRFM (literal)", "11011000iiiiiiiiiiiiiiiiiiittttt") -INST(LDR_lit_fpsimd, "LDR (literal, SIMD&FP)", "oo011100iiiiiiiiiiiiiiiiiiittttt") - -// Loads and stores - Load/Store no-allocate pair -INST(STNP_LDNP_gen, "STNP/LDNP", "o01010000Liiiiiiiuuuuunnnnnttttt") -INST(STNP_LDNP_fpsimd, "STNP/LDNP (SIMD&FP)", "oo1011000Liiiiiiiuuuuunnnnnttttt") - -// Loads and stores - Load/Store register pair -INST(STP_LDP_gen, "STP/LDP", "oo10100pwLiiiiiiiuuuuunnnnnttttt") -INST(UnallocatedEncoding, "", "--1010000-----------------------") -INST(STP_LDP_fpsimd, "STP/LDP (SIMD&FP)", "oo10110pwLiiiiiiiuuuuunnnnnttttt") -INST(UnallocatedEncoding, "", "--1011000-----------------------") - -// Loads and stores - Load/Store register (unscaled immediate) -INST(STURx_LDURx, "STURx/LDURx", "zz111000oo0iiiiiiiii00nnnnnttttt") -INST(UnallocatedEncoding, "", "111110001-0---------00----------") -INST(UnallocatedEncoding, "", "10111000110---------00----------") -INST(PRFM_imm, "PRFM (immediate)", "1111100110iiiiiiiiiiiinnnnnttttt") -INST(PRFM_unscaled_imm, "PRFM (unscaled offset)", "11111000100iiiiiiiii00nnnnnttttt") -INST(STUR_fpsimd, "STUR (SIMD&FP)", "zz111100o00iiiiiiiii00nnnnnttttt") -INST(LDUR_fpsimd, "LDUR (SIMD&FP)", "zz111100o10iiiiiiiii00nnnnnttttt") - -// Loads and stores - Load/Store register (immediate pre/post-indexed) -INST(STRx_LDRx_imm_1, "STRx/LDRx (immediate)", "zz111000oo0iiiiiiiiip1nnnnnttttt") -INST(STRx_LDRx_imm_2, "STRx/LDRx (immediate)", "zz111001ooiiiiiiiiiiiinnnnnttttt") -INST(UnallocatedEncoding, "", "111110001-0----------1----------") -INST(UnallocatedEncoding, "", "10111000110----------1----------") -INST(UnallocatedEncoding, "", "1111100111----------------------") -INST(UnallocatedEncoding, "", "1011100111----------------------") -INST(STR_imm_fpsimd_1, "STR (immediate, SIMD&FP)", "zz111100o00iiiiiiiiip1nnnnnttttt") -INST(STR_imm_fpsimd_2, "STR (immediate, SIMD&FP)", "zz111101o0iiiiiiiiiiiinnnnnttttt") -INST(LDR_imm_fpsimd_1, "LDR (immediate, SIMD&FP)", "zz111100o10iiiiiiiiip1nnnnnttttt") -INST(LDR_imm_fpsimd_2, "LDR (immediate, SIMD&FP)", "zz111101o1iiiiiiiiiiiinnnnnttttt") -//INST(STGP_1, "STGP (post-index)", "0110100010iiiiiiimmmmmnnnnnttttt") // ARMv8.5 -//INST(STGP_2, "STGP (pre-index)", "0110100110iiiiiiimmmmmnnnnnttttt") // ARMv8.5 -//INST(STGP_3, "STGP (signed-offset)", "0110100100iiiiiiimmmmmnnnnnttttt") // ARMv8.5 - -// Loads and stores - Load/Store register (unprivileged) -INST(STTRB, "STTRB", "00111000000iiiiiiiii10nnnnnttttt") -INST(LDTRB, "LDTRB", "00111000010iiiiiiiii10nnnnnttttt") -INST(LDTRSB, "LDTRSB", "00111000oo0iiiiiiiii10nnnnnttttt") -INST(STTRH, "STTRH", "01111000000iiiiiiiii10nnnnnttttt") -INST(LDTRH, "LDTRH", "01111000010iiiiiiiii10nnnnnttttt") -INST(LDTRSH, "LDTRSH", "01111000oo0iiiiiiiii10nnnnnttttt") -INST(STTR, "STTR", "zz111000000iiiiiiiii10nnnnnttttt") -INST(LDTR, "LDTR", "zz111000010iiiiiiiii10nnnnnttttt") -INST(LDTRSW, "LDTRSW", "10111000100iiiiiiiii10nnnnnttttt") - -// Loads and stores - Atomic memory options -//INST(LDADDB, "LDADDB, LDADDAB, LDADDALB, LDADDLB", "00111000AR1sssss000000nnnnnttttt") -//INST(LDCLRB, "LDCLRB, LDCLRAB, LDCLRALB, LDCLRLB", "00111000AR1sssss000100nnnnnttttt") -//INST(LDEORB, "LDEORB, LDEORAB, LDEORALB, LDEORLB", "00111000AR1sssss001000nnnnnttttt") -//INST(LDSETB, "LDSETB, LDSETAB, LDSETALB, LDSETLB", "00111000AR1sssss001100nnnnnttttt") -//INST(LDSMAXB, "LDSMAXB, LDSMAXAB, LDSMAXALB, LDSMAXLB", "00111000AR1sssss010000nnnnnttttt") -//INST(LDSMINB, "LDSMINB, LDSMINAB, LDSMINALB, LDSMINLB", "00111000AR1sssss010100nnnnnttttt") -//INST(LDUMAXB, "LDUMAXB, LDUMAXAB, LDUMAXALB, LDUMAXLB", "00111000AR1sssss011000nnnnnttttt") -//INST(LDUMINB, "LDUMINB, LDUMINAB, LDUMINALB, LDUMINLB", "00111000AR1sssss011100nnnnnttttt") -//INST(SWPB, "SWPB, SWPAB, SWPALB, SWPLB", "00111000AR1sssss100000nnnnnttttt") -//INST(LDAPRB, "LDAPRB", "0011100010111111110000nnnnnttttt") -//INST(LDADDH, "LDADDH, LDADDAH, LDADDALH, LDADDLH", "01111000AR1sssss000000nnnnnttttt") -//INST(LDCLRH, "LDCLRH, LDCLRAH, LDCLRALH, LDCLRLH", "01111000AR1sssss000100nnnnnttttt") -//INST(LDEORH, "LDEORH, LDEORAH, LDEORALH, LDEORLH", "01111000AR1sssss001000nnnnnttttt") -//INST(LDSETH, "LDSETH, LDSETAH, LDSETALH, LDSETLH", "01111000AR1sssss001100nnnnnttttt") -//INST(LDSMAXH, "LDSMAXH, LDSMAXAH, LDSMAXALH, LDSMAXLH", "01111000AR1sssss010000nnnnnttttt") -//INST(LDSMINH, "LDSMINH, LDSMINAH, LDSMINALH, LDSMINLH", "01111000AR1sssss010100nnnnnttttt") -//INST(LDUMAXH, "LDUMAXH, LDUMAXAH, LDUMAXALH, LDUMAXLH", "01111000AR1sssss011000nnnnnttttt") -//INST(LDUMINH, "LDUMINH, LDUMINAH, LDUMINALH, LDUMINLH", "01111000AR1sssss011100nnnnnttttt") -//INST(SWPH, "SWPH, SWPAH, SWPALH, SWPLH", "01111000AR1sssss100000nnnnnttttt") -//INST(LDAPRH, "LDAPRH", "0111100010111111110000nnnnnttttt") -//INST(LDADD, "LDADD, LDADDA, LDADDAL, LDADDL", "1-111000AR1sssss000000nnnnnttttt") -//INST(LDCLR, "LDCLR, LDCLRA, LDCLRAL, LDCLRL", "1-111000AR1sssss000100nnnnnttttt") -//INST(LDEOR, "LDEOR, LDEORA, LDEORAL, LDEORL", "1-111000AR1sssss001000nnnnnttttt") -//INST(LDSET, "LDSET, LDSETA, LDSETAL, LDSETL", "1-111000AR1sssss001100nnnnnttttt") -//INST(LDSMAX, "LDSMAX, LDSMAXA, LDSMAXAL, LDSMAXL", "1-111000AR1sssss010000nnnnnttttt") -//INST(LDSMIN, "LDSMIN, LDSMINA, LDSMINAL, LDSMINL", "1-111000AR1sssss010100nnnnnttttt") -//INST(LDUMAX, "LDUMAX, LDUMAXA, LDUMAXAL, LDUMAXL", "1-111000AR1sssss011000nnnnnttttt") -//INST(LDUMIN, "LDUMIN, LDUMINA, LDUMINAL, LDUMINL", "1-111000AR1sssss011100nnnnnttttt") -//INST(SWP, "SWP, SWPA, SWPAL, SWPL", "1-111000AR1sssss100000nnnnnttttt") -//INST(LDAPR, "LDAPR", "1-11100010111111110000nnnnnttttt") -//INST(LD64B, "LD64B", "1111100000111111110100nnnnnttttt") // v8.7 -//INST(ST64B, "ST64B", "1111100000111111100100nnnnnttttt") // v8.7 -//INST(ST64BV, "ST64BV", "11111000001sssss101100nnnnnttttt") // v8.7 -//INST(ST64BV0, "ST64BV0", "11111000001sssss101000nnnnnttttt") // v8.7 - -// Loads and stores - Load/Store register (register offset) -INST(STRx_reg, "STRx (register)", "zz111000o01mmmmmxxxS10nnnnnttttt") -INST(LDRx_reg, "LDRx (register)", "zz111000o11mmmmmxxxS10nnnnnttttt") -INST(STR_reg_fpsimd, "STR (register, SIMD&FP)", "zz111100o01mmmmmxxxS10nnnnnttttt") -INST(LDR_reg_fpsimd, "LDR (register, SIMD&FP)", "zz111100o11mmmmmxxxS10nnnnnttttt") - -// Loads and stores - Load/Store memory tags -//INST(STG_1, "STG (post-index)", "11011001001iiiiiiiii01nnnnn11111") // ARMv8.5 -//INST(STG_2, "STG (pre-index)", "11011001001iiiiiiiii11nnnnn11111") // ARMv8.5 -//INST(STG_3, "STG (signed-offset)", "11011001001iiiiiiiii10nnnnn11111") // ARMv8.5 -//INST(LDG, "LDG", "11011001011iiiiiiiii00nnnnnttttt") // ARMv8.5 -//INST(STZG_1, "STZG (post-index)", "11011001011iiiiiiiii01nnnnn11111") // ARMv8.5 -//INST(STZG_2, "STZG (pre-index)", "11011001011iiiiiiiii11nnnnn11111") // ARMv8.5 -//INST(STZG_3, "STZG (signed-offset)", "11011001011iiiiiiiii10nnnnn11111") // ARMv8.5 -//INST(ST2G_1, "ST2G (post-index)", "11011001101iiiiiiiii01nnnnn11111") // ARMv8.5 -//INST(ST2G_2, "ST2G (pre-index)", "11011001101iiiiiiiii11nnnnn11111") // ARMv8.5 -//INST(ST2G_3, "ST2G (signed-offset)", "11011001101iiiiiiiii10nnnnn11111") // ARMv8.5 -//INST(STGV, "STGV", "1101100110100000000000nnnnnttttt") // ARMv8.5 -//INST(STZ2G_1, "STZ2G (post-index)", "11011001111iiiiiiiii01nnnnn11111") // ARMv8.5 -//INST(STZ2G_2, "STZ2G (pre-index)", "11011001111iiiiiiiii11nnnnn11111") // ARMv8.5 -//INST(STZ2G_3, "STZ2G (signed-offset)", "11011001111iiiiiiiii10nnnnn11111") // ARMv8.5 -//INST(LDGV, "LDGV", "1101100111100000000000nnnnnttttt") // ARMv8.5 - -// Loads and stores - Load/Store register (pointer authentication) -//INST(LDRA, "LDRAA, LDRAB", "11111000MS1iiiiiiiiiW1nnnnnttttt") - -// Data Processing - Register - 2 source -INST(UDIV, "UDIV", "z0011010110mmmmm000010nnnnnddddd") -INST(SDIV, "SDIV", "z0011010110mmmmm000011nnnnnddddd") -INST(LSLV, "LSLV", "z0011010110mmmmm001000nnnnnddddd") -INST(LSRV, "LSRV", "z0011010110mmmmm001001nnnnnddddd") -INST(ASRV, "ASRV", "z0011010110mmmmm001010nnnnnddddd") -INST(RORV, "RORV", "z0011010110mmmmm001011nnnnnddddd") -INST(CRC32, "CRC32B, CRC32H, CRC32W, CRC32X", "z0011010110mmmmm0100zznnnnnddddd") -INST(CRC32C, "CRC32CB, CRC32CH, CRC32CW, CRC32CX", "z0011010110mmmmm0101zznnnnnddddd") -//INST(PACGA, "PACGA", "10011010110mmmmm001100nnnnnddddd") -//INST(SUBP, "SUBP", "10011010110mmmmm000000nnnnnddddd") // ARMv8.5 -//INST(IRG, "IRG", "10011010110mmmmm000100nnnnnddddd") // ARMv8.5 -//INST(GMI, "GMI", "10011010110mmmmm000101nnnnnddddd") // ARMv8.5 -//INST(SUBPS, "SUBPS", "10111010110mmmmm000000nnnnnddddd") // ARMv8.5 - -// Data Processing - Register - 1 source -INST(RBIT_int, "RBIT", "z101101011000000000000nnnnnddddd") -INST(REV16_int, "REV16", "z101101011000000000001nnnnnddddd") -INST(REV, "REV", "z10110101100000000001onnnnnddddd") -INST(CLZ_int, "CLZ", "z101101011000000000100nnnnnddddd") -INST(CLS_int, "CLS", "z101101011000000000101nnnnnddddd") -INST(REV32_int, "REV32", "1101101011000000000010nnnnnddddd") -//INST(PACDA, "PACDA, PACDZA", "110110101100000100Z010nnnnnddddd") -//INST(PACDB, "PACDB, PACDZB", "110110101100000100Z011nnnnnddddd") -//INST(AUTDA, "AUTDA, AUTDZA", "110110101100000100Z110nnnnnddddd") -//INST(AUTDB, "AUTDB, AUTDZB", "110110101100000100Z111nnnnnddddd") - -// Data Processing - Register - Logical (shifted register) -INST(AND_shift, "AND (shifted register)", "z0001010ss0mmmmmiiiiiinnnnnddddd") -INST(BIC_shift, "BIC (shifted register)", "z0001010ss1mmmmmiiiiiinnnnnddddd") -INST(ORR_shift, "ORR (shifted register)", "z0101010ss0mmmmmiiiiiinnnnnddddd") -INST(ORN_shift, "ORN (shifted register)", "z0101010ss1mmmmmiiiiiinnnnnddddd") -INST(EOR_shift, "EOR (shifted register)", "z1001010ss0mmmmmiiiiiinnnnnddddd") -INST(EON, "EON (shifted register)", "z1001010ss1mmmmmiiiiiinnnnnddddd") -INST(ANDS_shift, "ANDS (shifted register)", "z1101010ss0mmmmmiiiiiinnnnnddddd") -INST(BICS, "BICS (shifted register)", "z1101010ss1mmmmmiiiiiinnnnnddddd") - -// Data Processing - Register - Add/Sub (shifted register) -INST(ADD_shift, "ADD (shifted register)", "z0001011ss0mmmmmiiiiiinnnnnddddd") -INST(ADDS_shift, "ADDS (shifted register)", "z0101011ss0mmmmmiiiiiinnnnnddddd") -INST(SUB_shift, "SUB (shifted register)", "z1001011ss0mmmmmiiiiiinnnnnddddd") -INST(SUBS_shift, "SUBS (shifted register)", "z1101011ss0mmmmmiiiiiinnnnnddddd") - -// Data Processing - Register - Add/Sub (shifted register) -INST(ADD_ext, "ADD (extended register)", "z0001011001mmmmmxxxiiinnnnnddddd") -INST(ADDS_ext, "ADDS (extended register)", "z0101011001mmmmmxxxiiinnnnnddddd") -INST(SUB_ext, "SUB (extended register)", "z1001011001mmmmmxxxiiinnnnnddddd") -INST(SUBS_ext, "SUBS (extended register)", "z1101011001mmmmmxxxiiinnnnnddddd") - -// Data Processing - Register - Add/Sub (with carry) -INST(ADC, "ADC", "z0011010000mmmmm000000nnnnnddddd") -INST(ADCS, "ADCS", "z0111010000mmmmm000000nnnnnddddd") -INST(SBC, "SBC", "z1011010000mmmmm000000nnnnnddddd") -INST(SBCS, "SBCS", "z1111010000mmmmm000000nnnnnddddd") - -// Data Processing - Register - Conditional compare -INST(CCMN_reg, "CCMN (register)", "z0111010010mmmmmcccc00nnnnn0ffff") -INST(CCMP_reg, "CCMP (register)", "z1111010010mmmmmcccc00nnnnn0ffff") -INST(CCMN_imm, "CCMN (immediate)", "z0111010010iiiiicccc10nnnnn0ffff") -INST(CCMP_imm, "CCMP (immediate)", "z1111010010iiiiicccc10nnnnn0ffff") - -// Data Processing - Register - Conditional select -INST(CSEL, "CSEL", "z0011010100mmmmmcccc00nnnnnddddd") -INST(CSINC, "CSINC", "z0011010100mmmmmcccc01nnnnnddddd") -INST(CSINV, "CSINV", "z1011010100mmmmmcccc00nnnnnddddd") -INST(CSNEG, "CSNEG", "z1011010100mmmmmcccc01nnnnnddddd") - -// Data Processing - Register - 3 source -INST(MADD, "MADD", "z0011011000mmmmm0aaaaannnnnddddd") -INST(MSUB, "MSUB", "z0011011000mmmmm1aaaaannnnnddddd") -INST(SMADDL, "SMADDL", "10011011001mmmmm0aaaaannnnnddddd") -INST(SMSUBL, "SMSUBL", "10011011001mmmmm1aaaaannnnnddddd") -INST(SMULH, "SMULH", "10011011010mmmmm011111nnnnnddddd") -INST(UMADDL, "UMADDL", "10011011101mmmmm0aaaaannnnnddddd") -INST(UMSUBL, "UMSUBL", "10011011101mmmmm1aaaaannnnnddddd") -INST(UMULH, "UMULH", "10011011110mmmmm011111nnnnnddddd") - -// Data Processing - FP and SIMD - AES -INST(AESE, "AESE", "0100111000101000010010nnnnnddddd") -INST(AESD, "AESD", "0100111000101000010110nnnnnddddd") -INST(AESMC, "AESMC", "0100111000101000011010nnnnnddddd") -INST(AESIMC, "AESIMC", "0100111000101000011110nnnnnddddd") - -// Data Processing - FP and SIMD - SHA -INST(SHA1C, "SHA1C", "01011110000mmmmm000000nnnnnddddd") -INST(SHA1P, "SHA1P", "01011110000mmmmm000100nnnnnddddd") -INST(SHA1M, "SHA1M", "01011110000mmmmm001000nnnnnddddd") -INST(SHA1SU0, "SHA1SU0", "01011110000mmmmm001100nnnnnddddd") -INST(SHA256H, "SHA256H", "01011110000mmmmm010000nnnnnddddd") -INST(SHA256H2, "SHA256H2", "01011110000mmmmm010100nnnnnddddd") -INST(SHA256SU1, "SHA256SU1", "01011110000mmmmm011000nnnnnddddd") -INST(SHA1H, "SHA1H", "0101111000101000000010nnnnnddddd") -INST(SHA1SU1, "SHA1SU1", "0101111000101000000110nnnnnddddd") -INST(SHA256SU0, "SHA256SU0", "0101111000101000001010nnnnnddddd") - -// Data Processing - FP and SIMD - Scalar copy -INST(DUP_elt_1, "DUP (element)", "01011110000iiiii000001nnnnnddddd") - -// Data Processing - FP and SIMD - Scalar three -//INST(FMULX_vec_1, "FMULX", "01011110010mmmmm000111nnnnnddddd") -INST(FMULX_vec_2, "FMULX", "010111100z1mmmmm110111nnnnnddddd") -INST(FCMEQ_reg_1, "FCMEQ (register)", "01011110010mmmmm001001nnnnnddddd") -INST(FCMEQ_reg_2, "FCMEQ (register)", "010111100z1mmmmm111001nnnnnddddd") -INST(FRECPS_1, "FRECPS", "01011110010mmmmm001111nnnnnddddd") -INST(FRECPS_2, "FRECPS", "010111100z1mmmmm111111nnnnnddddd") -INST(FRSQRTS_1, "FRSQRTS", "01011110110mmmmm001111nnnnnddddd") -INST(FRSQRTS_2, "FRSQRTS", "010111101z1mmmmm111111nnnnnddddd") -//INST(FCMGE_reg_1, "FCMGE (register)", "01111110010mmmmm001001nnnnnddddd") -INST(FCMGE_reg_2, "FCMGE (register)", "011111100z1mmmmm111001nnnnnddddd") -//INST(FACGE_1, "FACGE", "01111110010mmmmm001011nnnnnddddd") -INST(FACGE_2, "FACGE", "011111100z1mmmmm111011nnnnnddddd") -//INST(FABD_1, "FABD", "01111110110mmmmm000101nnnnnddddd") -INST(FABD_2, "FABD", "011111101z1mmmmm110101nnnnnddddd") -//INST(FCMGT_reg_1, "FCMGT (register)", "01111110110mmmmm001001nnnnnddddd") -INST(FCMGT_reg_2, "FCMGT (register)", "011111101z1mmmmm111001nnnnnddddd") -//INST(FACGT_1, "FACGT", "01111110110mmmmm001011nnnnnddddd") -INST(FACGT_2, "FACGT", "011111101z1mmmmm111011nnnnnddddd") - -// Data Processing - FP and SIMD - Scalar two register misc -//INST(FCVTNS_1, "FCVTNS (vector)", "0101111001111001101010nnnnnddddd") -INST(FCVTNS_2, "FCVTNS (vector)", "010111100z100001101010nnnnnddddd") -//INST(FCVTMS_1, "FCVTMS (vector)", "0101111001111001101110nnnnnddddd") -INST(FCVTMS_2, "FCVTMS (vector)", "010111100z100001101110nnnnnddddd") -//INST(FCVTAS_1, "FCVTAS (vector)", "0101111001111001110010nnnnnddddd") -INST(FCVTAS_2, "FCVTAS (vector)", "010111100z100001110010nnnnnddddd") -//INST(SCVTF_int_1, "SCVTF (vector, integer)", "0101111001111001110110nnnnnddddd") -INST(SCVTF_int_2, "SCVTF (vector, integer)", "010111100z100001110110nnnnnddddd") -//INST(FCMGT_zero_1, "FCMGT (zero)", "0101111011111000110010nnnnnddddd") -INST(FCMGT_zero_2, "FCMGT (zero)", "010111101z100000110010nnnnnddddd") -INST(FCMEQ_zero_1, "FCMEQ (zero)", "0101111011111000110110nnnnnddddd") -INST(FCMEQ_zero_2, "FCMEQ (zero)", "010111101z100000110110nnnnnddddd") -//INST(FCMLT_1, "FCMLT (zero)", "0101111011111000111010nnnnnddddd") -INST(FCMLT_2, "FCMLT (zero)", "010111101z100000111010nnnnnddddd") -//INST(FCVTPS_1, "FCVTPS (vector)", "0101111011111001101010nnnnnddddd") -INST(FCVTPS_2, "FCVTPS (vector)", "010111101z100001101010nnnnnddddd") -//INST(FCVTZS_int_1, "FCVTZS (vector, integer)", "0101111011111001101110nnnnnddddd") -INST(FCVTZS_int_2, "FCVTZS (vector, integer)", "010111101z100001101110nnnnnddddd") -INST(FRECPE_1, "FRECPE", "0101111011111001110110nnnnnddddd") -INST(FRECPE_2, "FRECPE", "010111101z100001110110nnnnnddddd") -INST(FRECPX_1, "FRECPX", "0101111011111001111110nnnnnddddd") -INST(FRECPX_2, "FRECPX", "010111101z100001111110nnnnnddddd") -//INST(FCVTNU_1, "FCVTNU (vector)", "0111111001111001101010nnnnnddddd") -INST(FCVTNU_2, "FCVTNU (vector)", "011111100z100001101010nnnnnddddd") -//INST(FCVTMU_1, "FCVTMU (vector)", "0111111001111001101110nnnnnddddd") -INST(FCVTMU_2, "FCVTMU (vector)", "011111100z100001101110nnnnnddddd") -//INST(FCVTAU_1, "FCVTAU (vector)", "0111111001111001110010nnnnnddddd") -INST(FCVTAU_2, "FCVTAU (vector)", "011111100z100001110010nnnnnddddd") -//INST(UCVTF_int_1, "UCVTF (vector, integer)", "0111111001111001110110nnnnnddddd") -INST(UCVTF_int_2, "UCVTF (vector, integer)", "011111100z100001110110nnnnnddddd") -//INST(FCMGE_zero_1, "FCMGE (zero)", "0111111011111000110010nnnnnddddd") -INST(FCMGE_zero_2, "FCMGE (zero)", "011111101z100000110010nnnnnddddd") -//INST(FCMLE_1, "FCMLE (zero)", "0111111011111000110110nnnnnddddd") -INST(FCMLE_2, "FCMLE (zero)", "011111101z100000110110nnnnnddddd") -//INST(FCVTPU_1, "FCVTPU (vector)", "0111111011111001101010nnnnnddddd") -INST(FCVTPU_2, "FCVTPU (vector)", "011111101z100001101010nnnnnddddd") -//INST(FCVTZU_int_1, "FCVTZU (vector, integer)", "0111111011111001101110nnnnnddddd") -INST(FCVTZU_int_2, "FCVTZU (vector, integer)", "011111101z100001101110nnnnnddddd") -INST(FRSQRTE_1, "FRSQRTE", "0111111011111001110110nnnnnddddd") -INST(FRSQRTE_2, "FRSQRTE", "011111101z100001110110nnnnnddddd") - -// Data Processing - FP and SIMD - Scalar three same extra -//INST(SQRDMLAH_vec_1, "SQRDMLAH (vector)", "01111110zz0mmmmm100001nnnnnddddd") -//INST(SQRDMLAH_vec_2, "SQRDMLAH (vector)", "0Q101110zz0mmmmm100001nnnnnddddd") -//INST(SQRDMLSH_vec_1, "SQRDMLSH (vector)", "01111110zz0mmmmm100011nnnnnddddd") -//INST(SQRDMLSH_vec_2, "SQRDMLSH (vector)", "0Q101110zz0mmmmm100011nnnnnddddd") - -// Data Processing - FP and SIMD - Scalar two-register misc -INST(SUQADD_1, "SUQADD", "01011110zz100000001110nnnnnddddd") -INST(SQABS_1, "SQABS", "01011110zz100000011110nnnnnddddd") -INST(CMGT_zero_1, "CMGT (zero)", "01011110zz100000100010nnnnnddddd") -INST(CMEQ_zero_1, "CMEQ (zero)", "01011110zz100000100110nnnnnddddd") -INST(CMLT_1, "CMLT (zero)", "01011110zz100000101010nnnnnddddd") -INST(ABS_1, "ABS", "01011110zz100000101110nnnnnddddd") -INST(SQXTN_1, "SQXTN, SQXTN2", "01011110zz100001010010nnnnnddddd") -INST(USQADD_1, "USQADD", "01111110zz100000001110nnnnnddddd") -INST(SQNEG_1, "SQNEG", "01111110zz100000011110nnnnnddddd") -INST(CMGE_zero_1, "CMGE (zero)", "01111110zz100000100010nnnnnddddd") -INST(CMLE_1, "CMLE (zero)", "01111110zz100000100110nnnnnddddd") -INST(NEG_1, "NEG (vector)", "01111110zz100000101110nnnnnddddd") -INST(SQXTUN_1, "SQXTUN, SQXTUN2", "01111110zz100001001010nnnnnddddd") -INST(UQXTN_1, "UQXTN, UQXTN2", "01111110zz100001010010nnnnnddddd") -INST(FCVTXN_1, "FCVTXN, FCVTXN2", "011111100z100001011010nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Scalar pairwise -INST(ADDP_pair, "ADDP (scalar)", "01011110zz110001101110nnnnnddddd") -//INST(FMAXNMP_pair_1, "FMAXNMP (scalar)", "0101111000110000110010nnnnnddddd") -INST(FMAXNMP_pair_2, "FMAXNMP (scalar)", "011111100z110000110010nnnnnddddd") -//INST(FADDP_pair_1, "FADDP (scalar)", "0101111000110000110110nnnnnddddd") -INST(FADDP_pair_2, "FADDP (scalar)", "011111100z110000110110nnnnnddddd") -//INST(FMAXP_pair_1, "FMAXP (scalar)", "0101111000110000111110nnnnnddddd") -INST(FMAXP_pair_2, "FMAXP (scalar)", "011111100z110000111110nnnnnddddd") -//INST(FMINNMP_pair_1, "FMINNMP (scalar)", "0101111010110000110010nnnnnddddd") -INST(FMINNMP_pair_2, "FMINNMP (scalar)", "011111101z110000110010nnnnnddddd") -//INST(FMINP_pair_1, "FMINP (scalar)", "0101111010110000111110nnnnnddddd") -INST(FMINP_pair_2, "FMINP (scalar)", "011111101z110000111110nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Scalar three different -//INST(SQDMLAL_vec_1, "SQDMLAL, SQDMLAL2 (vector)", "01011110zz1mmmmm100100nnnnnddddd") -//INST(SQDMLSL_vec_1, "SQDMLSL, SQDMLSL2 (vector)", "01011110zz1mmmmm101100nnnnnddddd") -//INST(SQDMULL_vec_1, "SQDMULL, SQDMULL2 (vector)", "01011110zz1mmmmm110100nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Scalar three same -INST(SQADD_1, "SQADD", "01011110zz1mmmmm000011nnnnnddddd") -INST(SQSUB_1, "SQSUB", "01011110zz1mmmmm001011nnnnnddddd") -INST(CMGT_reg_1, "CMGT (register)", "01011110zz1mmmmm001101nnnnnddddd") -INST(CMGE_reg_1, "CMGE (register)", "01011110zz1mmmmm001111nnnnnddddd") -INST(SSHL_1, "SSHL", "01011110zz1mmmmm010001nnnnnddddd") -INST(SQSHL_reg_1, "SQSHL (register)", "01011110zz1mmmmm010011nnnnnddddd") -INST(SRSHL_1, "SRSHL", "01011110zz1mmmmm010101nnnnnddddd") -//INST(SQRSHL_1, "SQRSHL", "01011110zz1mmmmm010111nnnnnddddd") -INST(ADD_1, "ADD (vector)", "01011110zz1mmmmm100001nnnnnddddd") -INST(CMTST_1, "CMTST", "01011110zz1mmmmm100011nnnnnddddd") -INST(SQDMULH_vec_1, "SQDMULH (vector)", "01011110zz1mmmmm101101nnnnnddddd") -INST(UQADD_1, "UQADD", "01111110zz1mmmmm000011nnnnnddddd") -INST(UQSUB_1, "UQSUB", "01111110zz1mmmmm001011nnnnnddddd") -INST(CMHI_1, "CMHI (register)", "01111110zz1mmmmm001101nnnnnddddd") -INST(CMHS_1, "CMHS (register)", "01111110zz1mmmmm001111nnnnnddddd") -INST(USHL_1, "USHL", "01111110zz1mmmmm010001nnnnnddddd") -INST(UQSHL_reg_1, "UQSHL (register)", "01111110zz1mmmmm010011nnnnnddddd") -INST(URSHL_1, "URSHL", "01111110zz1mmmmm010101nnnnnddddd") -//INST(UQRSHL_1, "UQRSHL", "01111110zz1mmmmm010111nnnnnddddd") -INST(SUB_1, "SUB (vector)", "01111110zz1mmmmm100001nnnnnddddd") -INST(CMEQ_reg_1, "CMEQ (register)", "01111110zz1mmmmm100011nnnnnddddd") -INST(SQRDMULH_vec_1, "SQRDMULH (vector)", "01111110zz1mmmmm101101nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Scalar shift by immediate -INST(SSHR_1, "SSHR", "010111110IIIIiii000001nnnnnddddd") -INST(SSRA_1, "SSRA", "010111110IIIIiii000101nnnnnddddd") -INST(SRSHR_1, "SRSHR", "010111110IIIIiii001001nnnnnddddd") -INST(SRSRA_1, "SRSRA", "010111110IIIIiii001101nnnnnddddd") -INST(SHL_1, "SHL", "010111110IIIIiii010101nnnnnddddd") -INST(SQSHL_imm_1, "SQSHL (immediate)", "010111110IIIIiii011101nnnnnddddd") -INST(SQSHRN_1, "SQSHRN, SQSHRN2", "010111110IIIIiii100101nnnnnddddd") -//INST(SQRSHRN_1, "SQRSHRN, SQRSHRN2", "010111110IIIIiii100111nnnnnddddd") -INST(SCVTF_fix_1, "SCVTF (vector, fixed-point)", "010111110IIIIiii111001nnnnnddddd") -INST(FCVTZS_fix_1, "FCVTZS (vector, fixed-point)", "010111110IIIIiii111111nnnnnddddd") -INST(USHR_1, "USHR", "011111110IIIIiii000001nnnnnddddd") -INST(USRA_1, "USRA", "011111110IIIIiii000101nnnnnddddd") -INST(URSHR_1, "URSHR", "011111110IIIIiii001001nnnnnddddd") -INST(URSRA_1, "URSRA", "011111110IIIIiii001101nnnnnddddd") -INST(SRI_1, "SRI", "011111110IIIIiii010001nnnnnddddd") -INST(SLI_1, "SLI", "011111110IIIIiii010101nnnnnddddd") -INST(SQSHLU_1, "SQSHLU", "011111110IIIIiii011001nnnnnddddd") -INST(UQSHL_imm_1, "UQSHL (immediate)", "011111110IIIIiii011101nnnnnddddd") -INST(SQSHRUN_1, "SQSHRUN, SQSHRUN2", "011111110IIIIiii100001nnnnnddddd") -//INST(SQRSHRUN_1, "SQRSHRUN, SQRSHRUN2", "011111110IIIIiii100011nnnnnddddd") -INST(UQSHRN_1, "UQSHRN, UQSHRN2", "011111110IIIIiii100101nnnnnddddd") -//INST(UQRSHRN_1, "UQRSHRN, UQRSHRN2", "011111110IIIIiii100111nnnnnddddd") -INST(UCVTF_fix_1, "UCVTF (vector, fixed-point)", "011111110IIIIiii111001nnnnnddddd") -INST(FCVTZU_fix_1, "FCVTZU (vector, fixed-point)", "011111110IIIIiii111111nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Scalar x indexed element -//INST(SQDMLAL_elt_1, "SQDMLAL, SQDMLAL2 (by element)", "01011111zzLMmmmm0011H0nnnnnddddd") -//INST(SQDMLSL_elt_1, "SQDMLSL, SQDMLSL2 (by element)", "01011111zzLMmmmm0111H0nnnnnddddd") -INST(SQDMULL_elt_1, "SQDMULL, SQDMULL2 (by element)", "01011111zzLMmmmm1011H0nnnnnddddd") -INST(SQDMULH_elt_1, "SQDMULH (by element)", "01011111zzLMmmmm1100H0nnnnnddddd") -INST(SQRDMULH_elt_1, "SQRDMULH (by element)", "01011111zzLMmmmm1101H0nnnnnddddd") -INST(FMLA_elt_1, "FMLA (by element)", "0101111100LMmmmm0001H0nnnnnddddd") -INST(FMLA_elt_2, "FMLA (by element)", "010111111zLMmmmm0001H0nnnnnddddd") -INST(FMLS_elt_1, "FMLS (by element)", "0101111100LMmmmm0101H0nnnnnddddd") -INST(FMLS_elt_2, "FMLS (by element)", "010111111zLMmmmm0101H0nnnnnddddd") -//INST(FMUL_elt_1, "FMUL (by element)", "0101111100LMmmmm1001H0nnnnnddddd") -INST(FMUL_elt_2, "FMUL (by element)", "010111111zLMmmmm1001H0nnnnnddddd") -//INST(SQRDMLAH_elt_1, "SQRDMLAH (by element)", "01111111zzLMmmmm1101H0nnnnnddddd") -//INST(SQRDMLSH_elt_1, "SQRDMLSH (by element)", "01111111zzLMmmmm1111H0nnnnnddddd") -//INST(FMULX_elt_1, "FMULX (by element)", "0111111100LMmmmm1001H0nnnnnddddd") -INST(FMULX_elt_2, "FMULX (by element)", "011111111zLMmmmm1001H0nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Table Lookup -INST(TBL, "TBL", "0Q001110000mmmmm0LL000nnnnnddddd") -INST(TBX, "TBX", "0Q001110000mmmmm0LL100nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Permute -INST(UZP1, "UZP1", "0Q001110zz0mmmmm000110nnnnnddddd") -INST(TRN1, "TRN1", "0Q001110zz0mmmmm001010nnnnnddddd") -INST(ZIP1, "ZIP1", "0Q001110zz0mmmmm001110nnnnnddddd") -INST(UZP2, "UZP2", "0Q001110zz0mmmmm010110nnnnnddddd") -INST(TRN2, "TRN2", "0Q001110zz0mmmmm011010nnnnnddddd") -INST(ZIP2, "ZIP2", "0Q001110zz0mmmmm011110nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Extract -INST(EXT, "EXT", "0Q101110000mmmmm0iiii0nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Copy -INST(DUP_elt_2, "DUP (element)", "0Q001110000iiiii000001nnnnnddddd") -INST(DUP_gen, "DUP (general)", "0Q001110000iiiii000011nnnnnddddd") -INST(SMOV, "SMOV", "0Q001110000iiiii001011nnnnnddddd") -INST(UMOV, "UMOV", "0Q001110000iiiii001111nnnnnddddd") -INST(INS_gen, "INS (general)", "01001110000iiiii000111nnnnnddddd") -INST(INS_elt, "INS (element)", "01101110000iiiii0iiii1nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Three same -//INST(FMULX_vec_3, "FMULX", "0Q001110010mmmmm000111nnnnnddddd") -INST(FCMEQ_reg_3, "FCMEQ (register)", "0Q001110010mmmmm001001nnnnnddddd") -INST(FRECPS_3, "FRECPS", "0Q001110010mmmmm001111nnnnnddddd") -INST(FRSQRTS_3, "FRSQRTS", "0Q001110110mmmmm001111nnnnnddddd") -//INST(FCMGE_reg_3, "FCMGE (register)", "0Q101110010mmmmm001001nnnnnddddd") -//INST(FACGE_3, "FACGE", "0Q101110010mmmmm001011nnnnnddddd") -//INST(FABD_3, "FABD", "0Q101110110mmmmm000101nnnnnddddd") -//INST(FCMGT_reg_3, "FCMGT (register)", "0Q101110110mmmmm001001nnnnnddddd") -//INST(FACGT_3, "FACGT", "0Q101110110mmmmm001011nnnnnddddd") -//INST(FMAXNM_1, "FMAXNM (vector)", "0Q001110010mmmmm000001nnnnnddddd") -INST(FMLA_vec_1, "FMLA (vector)", "0Q001110010mmmmm000011nnnnnddddd") -//INST(FADD_1, "FADD (vector)", "0Q001110010mmmmm000101nnnnnddddd") -//INST(FMAX_1, "FMAX (vector)", "0Q001110010mmmmm001101nnnnnddddd") -//INST(FMINNM_1, "FMINNM (vector)", "0Q001110110mmmmm000001nnnnnddddd") -INST(FMLS_vec_1, "FMLS (vector)", "0Q001110110mmmmm000011nnnnnddddd") -//INST(FSUB_1, "FSUB (vector)", "0Q001110110mmmmm000101nnnnnddddd") -//INST(FMIN_1, "FMIN (vector)", "0Q001110110mmmmm001101nnnnnddddd") -//INST(FMAXNMP_vec_1, "FMAXNMP (vector)", "0Q101110010mmmmm000001nnnnnddddd") -//INST(FADDP_vec_1, "FADDP (vector)", "0Q101110010mmmmm000101nnnnnddddd") -//INST(FMUL_vec_1, "FMUL (vector)", "0Q101110010mmmmm000111nnnnnddddd") -//INST(FMAXP_vec_1, "FMAXP (vector)", "0Q101110010mmmmm001101nnnnnddddd") -//INST(FDIV_1, "FDIV (vector)", "0Q101110010mmmmm001111nnnnnddddd") -//INST(FMINNMP_vec_1, "FMINNMP (vector)", "0Q101110110mmmmm000001nnnnnddddd") -//INST(FMINP_vec_1, "FMINP (vector)", "0Q101110110mmmmm001101nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Three same extra -//INST(SMMLA_vec, "SMMLA", "01001110100mmmmm101001nnnnnddddd") // v8.6 -//INST(UMMLA_vec, "UMMLA", "01101110100mmmmm101001nnnnnddddd") // v8.6 -//INST(USMMLA_vec, "USMMLA", "01001110100mmmmm101011nnnnnddddd") // v8.6 -//INST(SUDOT_element, "SUDOT (by element)", "0Q00111100LMmmmm1111H0nnnnnddddd") // v8.6 -//INST(USDOT_element, "USDOT (by_element)", "0Q00111110LMmmmm1111H0nnnnnddddd") // v8.6 -//INST(USDOT_vec, "USDOT (vector)", "0Q001110100mmmmm100111nnnnnddddd") // v8.6 -INST(SDOT_vec, "SDOT (vector)", "0Q001110zz0mmmmm100101nnnnnddddd") -INST(UDOT_vec, "UDOT (vector)", "0Q101110zz0mmmmm100101nnnnnddddd") -INST(FCMLA_vec, "FCMLA", "0Q101110zz0mmmmm110rr1nnnnnddddd") -INST(FCADD_vec, "FCADD", "0Q101110zz0mmmmm111r01nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Two-register misc -INST(REV64_asimd, "REV64", "0Q001110zz100000000010nnnnnddddd") -INST(REV16_asimd, "REV16 (vector)", "0Q001110zz100000000110nnnnnddddd") -INST(SADDLP, "SADDLP", "0Q001110zz100000001010nnnnnddddd") -INST(SUQADD_2, "SUQADD", "0Q001110zz100000001110nnnnnddddd") -INST(CLS_asimd, "CLS (vector)", "0Q001110zz100000010010nnnnnddddd") -INST(CNT, "CNT", "0Q001110zz100000010110nnnnnddddd") -INST(SADALP, "SADALP", "0Q001110zz100000011010nnnnnddddd") -INST(SQABS_2, "SQABS", "0Q001110zz100000011110nnnnnddddd") -INST(CMGT_zero_2, "CMGT (zero)", "0Q001110zz100000100010nnnnnddddd") -INST(CMEQ_zero_2, "CMEQ (zero)", "0Q001110zz100000100110nnnnnddddd") -INST(CMLT_2, "CMLT (zero)", "0Q001110zz100000101010nnnnnddddd") -INST(ABS_2, "ABS", "0Q001110zz100000101110nnnnnddddd") -INST(XTN, "XTN, XTN2", "0Q001110zz100001001010nnnnnddddd") -INST(SQXTN_2, "SQXTN, SQXTN2", "0Q001110zz100001010010nnnnnddddd") -INST(FCVTN, "FCVTN, FCVTN2", "0Q0011100z100001011010nnnnnddddd") -INST(FCVTL, "FCVTL, FCVTL2", "0Q0011100z100001011110nnnnnddddd") -INST(FRINTN_1, "FRINTN (vector)", "0Q00111001111001100010nnnnnddddd") -INST(FRINTN_2, "FRINTN (vector)", "0Q0011100z100001100010nnnnnddddd") -INST(FRINTM_1, "FRINTM (vector)", "0Q00111001111001100110nnnnnddddd") -INST(FRINTM_2, "FRINTM (vector)", "0Q0011100z100001100110nnnnnddddd") -//INST(FCVTNS_3, "FCVTNS (vector)", "0Q00111001111001101010nnnnnddddd") -INST(FCVTNS_4, "FCVTNS (vector)", "0Q0011100z100001101010nnnnnddddd") -//INST(FCVTMS_3, "FCVTMS (vector)", "0Q00111001111001101110nnnnnddddd") -INST(FCVTMS_4, "FCVTMS (vector)", "0Q0011100z100001101110nnnnnddddd") -//INST(FCVTAS_3, "FCVTAS (vector)", "0Q00111001111001110010nnnnnddddd") -INST(FCVTAS_4, "FCVTAS (vector)", "0Q0011100z100001110010nnnnnddddd") -//INST(SCVTF_int_3, "SCVTF (vector, integer)", "0Q00111001111001110110nnnnnddddd") -INST(SCVTF_int_4, "SCVTF (vector, integer)", "0Q0011100z100001110110nnnnnddddd") -//INST(FCMGT_zero_3, "FCMGT (zero)", "0Q00111011111000110010nnnnnddddd") -INST(FCMGT_zero_4, "FCMGT (zero)", "0Q0011101z100000110010nnnnnddddd") -INST(FCMEQ_zero_3, "FCMEQ (zero)", "0Q00111011111000110110nnnnnddddd") -INST(FCMEQ_zero_4, "FCMEQ (zero)", "0Q0011101z100000110110nnnnnddddd") -//INST(FCMLT_3, "FCMLT (zero)", "0Q00111011111000111010nnnnnddddd") -INST(FCMLT_4, "FCMLT (zero)", "0Q0011101z100000111010nnnnnddddd") -INST(FABS_1, "FABS (vector)", "0Q00111011111000111110nnnnnddddd") -INST(FABS_2, "FABS (vector)", "0Q0011101z100000111110nnnnnddddd") -INST(FRINTP_1, "FRINTP (vector)", "0Q00111011111001100010nnnnnddddd") -INST(FRINTP_2, "FRINTP (vector)", "0Q0011101z100001100010nnnnnddddd") -INST(FRINTZ_1, "FRINTZ (vector)", "0Q00111011111001100110nnnnnddddd") -INST(FRINTZ_2, "FRINTZ (vector)", "0Q0011101z100001100110nnnnnddddd") -//INST(FCVTPS_3, "FCVTPS (vector)", "0Q00111011111001101010nnnnnddddd") -INST(FCVTPS_4, "FCVTPS (vector)", "0Q0011101z100001101010nnnnnddddd") -//INST(FCVTZS_int_3, "FCVTZS (vector, integer)", "0Q00111011111001101110nnnnnddddd") -INST(FCVTZS_int_4, "FCVTZS (vector, integer)", "0Q0011101z100001101110nnnnnddddd") -INST(URECPE, "URECPE", "0Q0011101z100001110010nnnnnddddd") -INST(FRECPE_3, "FRECPE", "0Q00111011111001110110nnnnnddddd") -INST(FRECPE_4, "FRECPE", "0Q0011101z100001110110nnnnnddddd") -INST(REV32_asimd, "REV32 (vector)", "0Q101110zz100000000010nnnnnddddd") -INST(UADDLP, "UADDLP", "0Q101110zz100000001010nnnnnddddd") -INST(USQADD_2, "USQADD", "0Q101110zz100000001110nnnnnddddd") -INST(CLZ_asimd, "CLZ (vector)", "0Q101110zz100000010010nnnnnddddd") -INST(UADALP, "UADALP", "0Q101110zz100000011010nnnnnddddd") -INST(SQNEG_2, "SQNEG", "0Q101110zz100000011110nnnnnddddd") -INST(CMGE_zero_2, "CMGE (zero)", "0Q101110zz100000100010nnnnnddddd") -INST(CMLE_2, "CMLE (zero)", "0Q101110zz100000100110nnnnnddddd") -INST(NEG_2, "NEG (vector)", "0Q101110zz100000101110nnnnnddddd") -INST(SQXTUN_2, "SQXTUN, SQXTUN2", "0Q101110zz100001001010nnnnnddddd") -INST(SHLL, "SHLL, SHLL2", "0Q101110zz100001001110nnnnnddddd") -INST(UQXTN_2, "UQXTN, UQXTN2", "0Q101110zz100001010010nnnnnddddd") -INST(FCVTXN_2, "FCVTXN, FCVTXN2", "0Q1011100z100001011010nnnnnddddd") -INST(FRINTA_1, "FRINTA (vector)", "0Q10111001111001100010nnnnnddddd") -INST(FRINTA_2, "FRINTA (vector)", "0Q1011100z100001100010nnnnnddddd") -INST(FRINTX_1, "FRINTX (vector)", "0Q10111001111001100110nnnnnddddd") -INST(FRINTX_2, "FRINTX (vector)", "0Q1011100z100001100110nnnnnddddd") -//INST(FCVTNU_3, "FCVTNU (vector)", "0Q10111001111001101010nnnnnddddd") -INST(FCVTNU_4, "FCVTNU (vector)", "0Q1011100z100001101010nnnnnddddd") -//INST(FCVTMU_3, "FCVTMU (vector)", "0Q10111001111001101110nnnnnddddd") -INST(FCVTMU_4, "FCVTMU (vector)", "0Q1011100z100001101110nnnnnddddd") -//INST(FCVTAU_3, "FCVTAU (vector)", "0Q10111001111001110010nnnnnddddd") -INST(FCVTAU_4, "FCVTAU (vector)", "0Q1011100z100001110010nnnnnddddd") -//INST(UCVTF_int_3, "UCVTF (vector, integer)", "0Q10111001111001110110nnnnnddddd") -INST(UCVTF_int_4, "UCVTF (vector, integer)", "0Q1011100z100001110110nnnnnddddd") -INST(NOT, "NOT", "0Q10111000100000010110nnnnnddddd") -INST(RBIT_asimd, "RBIT (vector)", "0Q10111001100000010110nnnnnddddd") -INST(FNEG_1, "FNEG (vector)", "0Q10111011111000111110nnnnnddddd") -INST(FNEG_2, "FNEG (vector)", "0Q1011101z100000111110nnnnnddddd") -INST(FRINTI_1, "FRINTI (vector)", "0Q10111011111001100110nnnnnddddd") -INST(FRINTI_2, "FRINTI (vector)", "0Q1011101z100001100110nnnnnddddd") -//INST(FCMGE_zero_3, "FCMGE (zero)", "0Q10111011111000110010nnnnnddddd") -INST(FCMGE_zero_4, "FCMGE (zero)", "0Q1011101z100000110010nnnnnddddd") -//INST(FCMLE_3, "FCMLE (zero)", "0Q10111011111000110110nnnnnddddd") -INST(FCMLE_4, "FCMLE (zero)", "0Q1011101z100000110110nnnnnddddd") -//INST(FCVTPU_3, "FCVTPU (vector)", "0Q10111011111001101010nnnnnddddd") -INST(FCVTPU_4, "FCVTPU (vector)", "0Q1011101z100001101010nnnnnddddd") -//INST(FCVTZU_int_3, "FCVTZU (vector, integer)", "0Q10111011111001101110nnnnnddddd") -INST(FCVTZU_int_4, "FCVTZU (vector, integer)", "0Q1011101z100001101110nnnnnddddd") -INST(URSQRTE, "URSQRTE", "0Q1011101z100001110010nnnnnddddd") -INST(FRSQRTE_3, "FRSQRTE", "0Q10111011111001110110nnnnnddddd") -INST(FRSQRTE_4, "FRSQRTE", "0Q1011101z100001110110nnnnnddddd") -//INST(FSQRT_1, "FSQRT (vector)", "0Q10111011111001111110nnnnnddddd") -INST(FSQRT_2, "FSQRT (vector)", "0Q1011101z100001111110nnnnnddddd") -//INST(FRINT32X_1, "FRINT32X (vector)", "0Q1011100z100001111110nnnnnddddd") // ARMv8.5 -//INST(FRINT64X_1, "FRINT64X (vector)", "0Q1011100z100001111010nnnnnddddd") // ARMv8.5 -//INST(FRINT32Z_1, "FRINT32Z (vector)", "0Q0011100z100001111010nnnnnddddd") // ARMv8.5 -//INST(FRINT64Z_1, "FRINT64Z (vector)", "0Q0011100z100001111110nnnnnddddd") // ARMv8.5 - -// Data Processing - FP and SIMD - SIMD across lanes -INST(SADDLV, "SADDLV", "0Q001110zz110000001110nnnnnddddd") -INST(SMAXV, "SMAXV", "0Q001110zz110000101010nnnnnddddd") -INST(SMINV, "SMINV", "0Q001110zz110001101010nnnnnddddd") -INST(ADDV, "ADDV", "0Q001110zz110001101110nnnnnddddd") -//INST(FMAXNMV_1, "FMAXNMV", "0Q00111000110000110010nnnnnddddd") -INST(FMAXNMV_2, "FMAXNMV", "0Q1011100z110000110010nnnnnddddd") -//INST(FMAXV_1, "FMAXV", "0Q00111000110000111110nnnnnddddd") -INST(FMAXV_2, "FMAXV", "0Q1011100z110000111110nnnnnddddd") -//INST(FMINNMV_1, "FMINNMV", "0Q00111010110000110010nnnnnddddd") -INST(FMINNMV_2, "FMINNMV", "0Q1011101z110000110010nnnnnddddd") -//INST(FMINV_1, "FMINV", "0Q00111010110000111110nnnnnddddd") -INST(FMINV_2, "FMINV", "0Q1011101z110000111110nnnnnddddd") -INST(UADDLV, "UADDLV", "0Q101110zz110000001110nnnnnddddd") -INST(UMAXV, "UMAXV", "0Q101110zz110000101010nnnnnddddd") -INST(UMINV, "UMINV", "0Q101110zz110001101010nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD three different -INST(SADDL, "SADDL, SADDL2", "0Q001110zz1mmmmm000000nnnnnddddd") -INST(SADDW, "SADDW, SADDW2", "0Q001110zz1mmmmm000100nnnnnddddd") -INST(SSUBL, "SSUBL, SSUBL2", "0Q001110zz1mmmmm001000nnnnnddddd") -INST(SSUBW, "SSUBW, SSUBW2", "0Q001110zz1mmmmm001100nnnnnddddd") -INST(ADDHN, "ADDHN, ADDHN2", "0Q001110zz1mmmmm010000nnnnnddddd") -INST(SABAL, "SABAL, SABAL2", "0Q001110zz1mmmmm010100nnnnnddddd") -INST(SUBHN, "SUBHN, SUBHN2", "0Q001110zz1mmmmm011000nnnnnddddd") -INST(SABDL, "SABDL, SABDL2", "0Q001110zz1mmmmm011100nnnnnddddd") -INST(SMLAL_vec, "SMLAL, SMLAL2 (vector)", "0Q001110zz1mmmmm100000nnnnnddddd") -INST(SMLSL_vec, "SMLSL, SMLSL2 (vector)", "0Q001110zz1mmmmm101000nnnnnddddd") -INST(SMULL_vec, "SMULL, SMULL2 (vector)", "0Q001110zz1mmmmm110000nnnnnddddd") -INST(PMULL, "PMULL, PMULL2", "0Q001110zz1mmmmm111000nnnnnddddd") -INST(UADDL, "UADDL, UADDL2", "0Q101110zz1mmmmm000000nnnnnddddd") -INST(UADDW, "UADDW, UADDW2", "0Q101110zz1mmmmm000100nnnnnddddd") -INST(USUBL, "USUBL, USUBL2", "0Q101110zz1mmmmm001000nnnnnddddd") -INST(USUBW, "USUBW, USUBW2", "0Q101110zz1mmmmm001100nnnnnddddd") -INST(RADDHN, "RADDHN, RADDHN2", "0Q101110zz1mmmmm010000nnnnnddddd") -INST(UABAL, "UABAL, UABAL2", "0Q101110zz1mmmmm010100nnnnnddddd") -INST(RSUBHN, "RSUBHN, RSUBHN2", "0Q101110zz1mmmmm011000nnnnnddddd") -INST(UABDL, "UABDL, UABDL2", "0Q101110zz1mmmmm011100nnnnnddddd") -INST(UMLAL_vec, "UMLAL, UMLAL2 (vector)", "0Q101110zz1mmmmm100000nnnnnddddd") -INST(UMLSL_vec, "UMLSL, UMLSL2 (vector)", "0Q101110zz1mmmmm101000nnnnnddddd") -INST(UMULL_vec, "UMULL, UMULL2 (vector)", "0Q101110zz1mmmmm110000nnnnnddddd") -//INST(SQDMLAL_vec_2, "SQDMLAL, SQDMLAL2 (vector)", "0Q001110zz1mmmmm100100nnnnnddddd") -//INST(SQDMLSL_vec_2, "SQDMLSL, SQDMLSL2 (vector)", "0Q001110zz1mmmmm101100nnnnnddddd") -INST(SQDMULL_vec_2, "SQDMULL, SQDMULL2 (vector)", "0Q001110zz1mmmmm110100nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD three same -INST(SHADD, "SHADD", "0Q001110zz1mmmmm000001nnnnnddddd") -INST(SQADD_2, "SQADD", "0Q001110zz1mmmmm000011nnnnnddddd") -INST(SRHADD, "SRHADD", "0Q001110zz1mmmmm000101nnnnnddddd") -INST(SHSUB, "SHSUB", "0Q001110zz1mmmmm001001nnnnnddddd") -INST(SQSUB_2, "SQSUB", "0Q001110zz1mmmmm001011nnnnnddddd") -INST(CMGT_reg_2, "CMGT (register)", "0Q001110zz1mmmmm001101nnnnnddddd") -INST(CMGE_reg_2, "CMGE (register)", "0Q001110zz1mmmmm001111nnnnnddddd") -INST(SSHL_2, "SSHL", "0Q001110zz1mmmmm010001nnnnnddddd") -INST(SQSHL_reg_2, "SQSHL (register)", "0Q001110zz1mmmmm010011nnnnnddddd") -INST(SRSHL_2, "SRSHL", "0Q001110zz1mmmmm010101nnnnnddddd") -//INST(SQRSHL_2, "SQRSHL", "0Q001110zz1mmmmm010111nnnnnddddd") -INST(SMAX, "SMAX", "0Q001110zz1mmmmm011001nnnnnddddd") -INST(SMIN, "SMIN", "0Q001110zz1mmmmm011011nnnnnddddd") -INST(SABD, "SABD", "0Q001110zz1mmmmm011101nnnnnddddd") -INST(SABA, "SABA", "0Q001110zz1mmmmm011111nnnnnddddd") -INST(ADD_vector, "ADD (vector)", "0Q001110zz1mmmmm100001nnnnnddddd") -INST(CMTST_2, "CMTST", "0Q001110zz1mmmmm100011nnnnnddddd") -INST(MLA_vec, "MLA (vector)", "0Q001110zz1mmmmm100101nnnnnddddd") -INST(MUL_vec, "MUL (vector)", "0Q001110zz1mmmmm100111nnnnnddddd") -INST(SMAXP, "SMAXP", "0Q001110zz1mmmmm101001nnnnnddddd") -INST(SMINP, "SMINP", "0Q001110zz1mmmmm101011nnnnnddddd") -INST(SQDMULH_vec_2, "SQDMULH (vector)", "0Q001110zz1mmmmm101101nnnnnddddd") -INST(ADDP_vec, "ADDP (vector)", "0Q001110zz1mmmmm101111nnnnnddddd") -INST(FMAXNM_2, "FMAXNM (vector)", "0Q0011100z1mmmmm110001nnnnnddddd") -INST(FMLA_vec_2, "FMLA (vector)", "0Q0011100z1mmmmm110011nnnnnddddd") -INST(FADD_2, "FADD (vector)", "0Q0011100z1mmmmm110101nnnnnddddd") -INST(FMAX_2, "FMAX (vector)", "0Q0011100z1mmmmm111101nnnnnddddd") -INST(FMULX_vec_4, "FMULX", "0Q0011100z1mmmmm110111nnnnnddddd") -INST(FCMEQ_reg_4, "FCMEQ (register)", "0Q0011100z1mmmmm111001nnnnnddddd") -//INST(FMLAL_vec_1, "FMLAL, FMLAL2 (vector)", "0Q0011100z1mmmmm111011nnnnnddddd") -INST(FRECPS_4, "FRECPS", "0Q0011100z1mmmmm111111nnnnnddddd") -INST(AND_asimd, "AND (vector)", "0Q001110001mmmmm000111nnnnnddddd") -INST(BIC_asimd_reg, "BIC (vector, register)", "0Q001110011mmmmm000111nnnnnddddd") -INST(FMINNM_2, "FMINNM (vector)", "0Q0011101z1mmmmm110001nnnnnddddd") -INST(FMLS_vec_2, "FMLS (vector)", "0Q0011101z1mmmmm110011nnnnnddddd") -INST(FSUB_2, "FSUB (vector)", "0Q0011101z1mmmmm110101nnnnnddddd") -//INST(FMLSL_vec_1, "FMLSL, FMLSL2 (vector)", "0Q0011101z1mmmmm111011nnnnnddddd") -INST(FMIN_2, "FMIN (vector)", "0Q0011101z1mmmmm111101nnnnnddddd") -INST(FRSQRTS_4, "FRSQRTS", "0Q0011101z1mmmmm111111nnnnnddddd") -INST(ORR_asimd_reg, "ORR (vector, register)", "0Q001110101mmmmm000111nnnnnddddd") -INST(ORN_asimd, "ORN (vector)", "0Q001110111mmmmm000111nnnnnddddd") -INST(UHADD, "UHADD", "0Q101110zz1mmmmm000001nnnnnddddd") -INST(UQADD_2, "UQADD", "0Q101110zz1mmmmm000011nnnnnddddd") -INST(URHADD, "URHADD", "0Q101110zz1mmmmm000101nnnnnddddd") -INST(UHSUB, "UHSUB", "0Q101110zz1mmmmm001001nnnnnddddd") -INST(UQSUB_2, "UQSUB", "0Q101110zz1mmmmm001011nnnnnddddd") -INST(CMHI_2, "CMHI (register)", "0Q101110zz1mmmmm001101nnnnnddddd") -INST(CMHS_2, "CMHS (register)", "0Q101110zz1mmmmm001111nnnnnddddd") -INST(USHL_2, "USHL", "0Q101110zz1mmmmm010001nnnnnddddd") -INST(UQSHL_reg_2, "UQSHL (register)", "0Q101110zz1mmmmm010011nnnnnddddd") -INST(URSHL_2, "URSHL", "0Q101110zz1mmmmm010101nnnnnddddd") -//INST(UQRSHL_2, "UQRSHL", "0Q101110zz1mmmmm010111nnnnnddddd") -INST(UMAX, "UMAX", "0Q101110zz1mmmmm011001nnnnnddddd") -INST(UMIN, "UMIN", "0Q101110zz1mmmmm011011nnnnnddddd") -INST(UABD, "UABD", "0Q101110zz1mmmmm011101nnnnnddddd") -INST(UABA, "UABA", "0Q101110zz1mmmmm011111nnnnnddddd") -INST(SUB_2, "SUB (vector)", "0Q101110zz1mmmmm100001nnnnnddddd") -INST(CMEQ_reg_2, "CMEQ (register)", "0Q101110zz1mmmmm100011nnnnnddddd") -INST(MLS_vec, "MLS (vector)", "0Q101110zz1mmmmm100101nnnnnddddd") -INST(PMUL, "PMUL", "0Q101110zz1mmmmm100111nnnnnddddd") -INST(UMAXP, "UMAXP", "0Q101110zz1mmmmm101001nnnnnddddd") -INST(UMINP, "UMINP", "0Q101110zz1mmmmm101011nnnnnddddd") -INST(SQRDMULH_vec_2, "SQRDMULH (vector)", "0Q101110zz1mmmmm101101nnnnnddddd") -INST(FMAXNMP_vec_2, "FMAXNMP (vector)", "0Q1011100z1mmmmm110001nnnnnddddd") -//INST(FMLAL_vec_2, "FMLAL, FMLAL2 (vector)", "0Q1011100z1mmmmm110011nnnnnddddd") -INST(FADDP_vec_2, "FADDP (vector)", "0Q1011100z1mmmmm110101nnnnnddddd") -INST(FMUL_vec_2, "FMUL (vector)", "0Q1011100z1mmmmm110111nnnnnddddd") -INST(FCMGE_reg_4, "FCMGE (register)", "0Q1011100z1mmmmm111001nnnnnddddd") -INST(FACGE_4, "FACGE", "0Q1011100z1mmmmm111011nnnnnddddd") -INST(FMAXP_vec_2, "FMAXP (vector)", "0Q1011100z1mmmmm111101nnnnnddddd") -INST(FDIV_2, "FDIV (vector)", "0Q1011100z1mmmmm111111nnnnnddddd") -INST(EOR_asimd, "EOR (vector)", "0Q101110001mmmmm000111nnnnnddddd") -INST(BSL, "BSL", "0Q101110011mmmmm000111nnnnnddddd") -INST(FMINNMP_vec_2, "FMINNMP (vector)", "0Q1011101z1mmmmm110001nnnnnddddd") -//INST(FMLSL_vec_2, "FMLSL, FMLSL2 (vector)", "0Q1011101z1mmmmm110011nnnnnddddd") -INST(FABD_4, "FABD", "0Q1011101z1mmmmm110101nnnnnddddd") -INST(FCMGT_reg_4, "FCMGT (register)", "0Q1011101z1mmmmm111001nnnnnddddd") -INST(FACGT_4, "FACGT", "0Q1011101z1mmmmm111011nnnnnddddd") -INST(FMINP_vec_2, "FMINP (vector)", "0Q1011101z1mmmmm111101nnnnnddddd") -INST(BIT, "BIT", "0Q101110101mmmmm000111nnnnnddddd") -INST(BIF, "BIF", "0Q101110111mmmmm000111nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD modified immediate -INST(MOVI, "MOVI, MVNI, ORR, BIC (vector, immediate)", "0Qo0111100000abcmmmm01defghddddd") -INST(FMOV_2, "FMOV (vector, immediate)", "0Qo0111100000abc111101defghddddd") -INST(FMOV_3, "FMOV (vector, immediate)", "0Q00111100000abc111111defghddddd") -INST(UnallocatedEncoding, "Unallocated SIMD modified immediate", "0--0111100000-------11----------") - -// Data Processing - FP and SIMD - SIMD Shift by immediate -INST(SSHR_2, "SSHR", "0Q0011110IIIIiii000001nnnnnddddd") -INST(SSRA_2, "SSRA", "0Q0011110IIIIiii000101nnnnnddddd") -INST(SRSHR_2, "SRSHR", "0Q0011110IIIIiii001001nnnnnddddd") -INST(SRSRA_2, "SRSRA", "0Q0011110IIIIiii001101nnnnnddddd") -INST(SHL_2, "SHL", "0Q0011110IIIIiii010101nnnnnddddd") -INST(SQSHL_imm_2, "SQSHL (immediate)", "0Q0011110IIIIiii011101nnnnnddddd") -INST(SHRN, "SHRN, SHRN2", "0Q0011110IIIIiii100001nnnnnddddd") -INST(RSHRN, "RSHRN, RSHRN2", "0Q0011110IIIIiii100011nnnnnddddd") -INST(SQSHRN_2, "SQSHRN, SQSHRN2", "0Q0011110IIIIiii100101nnnnnddddd") -INST(SQRSHRN_2, "SQRSHRN, SQRSHRN2", "0Q0011110IIIIiii100111nnnnnddddd") -INST(SSHLL, "SSHLL, SSHLL2", "0Q0011110IIIIiii101001nnnnnddddd") -INST(SCVTF_fix_2, "SCVTF (vector, fixed-point)", "0Q0011110IIIIiii111001nnnnnddddd") -INST(FCVTZS_fix_2, "FCVTZS (vector, fixed-point)", "0Q0011110IIIIiii111111nnnnnddddd") -INST(USHR_2, "USHR", "0Q1011110IIIIiii000001nnnnnddddd") -INST(USRA_2, "USRA", "0Q1011110IIIIiii000101nnnnnddddd") -INST(URSHR_2, "URSHR", "0Q1011110IIIIiii001001nnnnnddddd") -INST(URSRA_2, "URSRA", "0Q1011110IIIIiii001101nnnnnddddd") -INST(SRI_2, "SRI", "0Q1011110IIIIiii010001nnnnnddddd") -INST(SLI_2, "SLI", "0Q1011110IIIIiii010101nnnnnddddd") -INST(SQSHLU_2, "SQSHLU", "0Q1011110IIIIiii011001nnnnnddddd") -INST(UQSHL_imm_2, "UQSHL (immediate)", "0Q1011110IIIIiii011101nnnnnddddd") -INST(SQSHRUN_2, "SQSHRUN, SQSHRUN2", "0Q1011110IIIIiii100001nnnnnddddd") -INST(SQRSHRUN_2, "SQRSHRUN, SQRSHRUN2", "0Q1011110IIIIiii100011nnnnnddddd") -INST(UQSHRN_2, "UQSHRN, UQSHRN2", "0Q1011110IIIIiii100101nnnnnddddd") -INST(UQRSHRN_2, "UQRSHRN, UQRSHRN2", "0Q1011110IIIIiii100111nnnnnddddd") -INST(USHLL, "USHLL, USHLL2", "0Q1011110IIIIiii101001nnnnnddddd") -INST(UCVTF_fix_2, "UCVTF (vector, fixed-point)", "0Q1011110IIIIiii111001nnnnnddddd") -INST(FCVTZU_fix_2, "FCVTZU (vector, fixed-point)", "0Q1011110IIIIiii111111nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD vector x indexed element -INST(SMLAL_elt, "SMLAL, SMLAL2 (by element)", "0Q001111zzLMmmmm0010H0nnnnnddddd") -//INST(SQDMLAL_elt_2, "SQDMLAL, SQDMLAL2 (by element)", "0Q001111zzLMmmmm0011H0nnnnnddddd") -INST(SMLSL_elt, "SMLSL, SMLSL2 (by element)", "0Q001111zzLMmmmm0110H0nnnnnddddd") -//INST(SQDMLSL_elt_2, "SQDMLSL, SQDMLSL2 (by element)", "0Q001111zzLMmmmm0111H0nnnnnddddd") -INST(MUL_elt, "MUL (by element)", "0Q001111zzLMmmmm1000H0nnnnnddddd") -INST(SMULL_elt, "SMULL, SMULL2 (by element)", "0Q001111zzLMmmmm1010H0nnnnnddddd") -INST(SQDMULL_elt_2, "SQDMULL, SQDMULL2 (by element)", "0Q001111zzLMmmmm1011H0nnnnnddddd") -INST(SQDMULH_elt_2, "SQDMULH (by element)", "0Q001111zzLMmmmm1100H0nnnnnddddd") -INST(SQRDMULH_elt_2, "SQRDMULH (by element)", "0Q001111zzLMmmmm1101H0nnnnnddddd") -INST(SDOT_elt, "SDOT (by element)", "0Q001111zzLMmmmm1110H0nnnnnddddd") -INST(FMLA_elt_3, "FMLA (by element)", "0Q00111100LMmmmm0001H0nnnnnddddd") -INST(FMLA_elt_4, "FMLA (by element)", "0Q0011111zLMmmmm0001H0nnnnnddddd") -INST(FMLS_elt_3, "FMLS (by element)", "0Q00111100LMmmmm0101H0nnnnnddddd") -INST(FMLS_elt_4, "FMLS (by element)", "0Q0011111zLMmmmm0101H0nnnnnddddd") -//INST(FMUL_elt_3, "FMUL (by element)", "0Q00111100LMmmmm1001H0nnnnnddddd") -INST(FMUL_elt_4, "FMUL (by element)", "0Q0011111zLMmmmm1001H0nnnnnddddd") -//INST(FMLAL_elt_1, "FMLAL, FMLAL2 (by element)", "0Q0011111zLMmmmm0000H0nnnnnddddd") -//INST(FMLAL_elt_2, "FMLAL, FMLAL2 (by element)", "0Q1011111zLMmmmm1000H0nnnnnddddd") -//INST(FMLSL_elt_1, "FMLSL, FMLSL2 (by element)", "0Q0011111zLMmmmm0100H0nnnnnddddd") -//INST(FMLSL_elt_2, "FMLSL, FMLSL2 (by element)", "0Q1011111zLMmmmm1100H0nnnnnddddd") -INST(MLA_elt, "MLA (by element)", "0Q101111zzLMmmmm0000H0nnnnnddddd") -INST(UMLAL_elt, "UMLAL, UMLAL2 (by element)", "0Q101111zzLMmmmm0010H0nnnnnddddd") -INST(MLS_elt, "MLS (by element)", "0Q101111zzLMmmmm0100H0nnnnnddddd") -INST(UMLSL_elt, "UMLSL, UMLSL2 (by element)", "0Q101111zzLMmmmm0110H0nnnnnddddd") -INST(UMULL_elt, "UMULL, UMULL2 (by element)", "0Q101111zzLMmmmm1010H0nnnnnddddd") -//INST(SQRDMLAH_elt_2, "SQRDMLAH (by element)", "0Q101111zzLMmmmm1101H0nnnnnddddd") -INST(UDOT_elt, "UDOT (by element)", "0Q101111zzLMmmmm1110H0nnnnnddddd") -//INST(SQRDMLSH_elt_2, "SQRDMLSH (by element)", "0Q101111zzLMmmmm1111H0nnnnnddddd") -//INST(FMULX_elt_3, "FMULX (by element)", "0Q10111100LMmmmm1001H0nnnnnddddd") -INST(FMULX_elt_4, "FMULX (by element)", "0Q1011111zLMmmmm1001H0nnnnnddddd") -INST(FCMLA_elt, "FCMLA (by element)", "0Q101111zzLMmmmm0rr1H0nnnnnddddd") - -// Data Processing - FP and SIMD - Cryptographic three register -INST(SM3TT1A, "SM3TT1A", "11001110010mmmmm10ii00nnnnnddddd") -INST(SM3TT1B, "SM3TT1B", "11001110010mmmmm10ii01nnnnnddddd") -INST(SM3TT2A, "SM3TT2A", "11001110010mmmmm10ii10nnnnnddddd") -INST(SM3TT2B, "SM3TT2B", "11001110010mmmmm10ii11nnnnnddddd") - -// Data Processing - FP and SIMD - SHA512 three register -INST(SHA512H, "SHA512H", "11001110011mmmmm100000nnnnnddddd") -INST(SHA512H2, "SHA512H2", "11001110011mmmmm100001nnnnnddddd") -INST(SHA512SU1, "SHA512SU1", "11001110011mmmmm100010nnnnnddddd") -INST(RAX1, "RAX1", "11001110011mmmmm100011nnnnnddddd") -INST(SM3PARTW1, "SM3PARTW1", "11001110011mmmmm110000nnnnnddddd") -INST(SM3PARTW2, "SM3PARTW2", "11001110011mmmmm110001nnnnnddddd") -INST(SM4EKEY, "SM4EKEY", "11001110011mmmmm110010nnnnnddddd") -INST(XAR, "XAR", "11001110100mmmmmiiiiiinnnnnddddd") - -// Data Processing - FP and SIMD - Cryptographic four register -INST(EOR3, "EOR3", "11001110000mmmmm0aaaaannnnnddddd") -INST(BCAX, "BCAX", "11001110001mmmmm0aaaaannnnnddddd") -INST(SM3SS1, "SM3SS1", "11001110010mmmmm0aaaaannnnnddddd") - -// Data Processing - FP and SIMD - SHA512 two register -INST(SHA512SU0, "SHA512SU0", "1100111011000000100000nnnnnddddd") -INST(SM4E, "SM4E", "1100111011000000100001nnnnnddddd") - -// Data Processing - FP and SIMD - Conversion between floating point and fixed point -INST(SCVTF_float_fix, "SCVTF (scalar, fixed-point)", "z0011110yy000010ppppppnnnnnddddd") -INST(UCVTF_float_fix, "UCVTF (scalar, fixed-point)", "z0011110yy000011ppppppnnnnnddddd") -INST(FCVTZS_float_fix, "FCVTZS (scalar, fixed-point)", "z0011110yy011000ppppppnnnnnddddd") -INST(FCVTZU_float_fix, "FCVTZU (scalar, fixed-point)", "z0011110yy011001ppppppnnnnnddddd") - -// Data Processing - FP and SIMD - Conversion between floating point and integer -INST(FCVTNS_float, "FCVTNS (scalar)", "z0011110yy100000000000nnnnnddddd") -INST(FCVTNU_float, "FCVTNU (scalar)", "z0011110yy100001000000nnnnnddddd") -INST(SCVTF_float_int, "SCVTF (scalar, integer)", "z0011110yy100010000000nnnnnddddd") -INST(UCVTF_float_int, "UCVTF (scalar, integer)", "z0011110yy100011000000nnnnnddddd") -INST(FCVTAS_float, "FCVTAS (scalar)", "z0011110yy100100000000nnnnnddddd") -INST(FCVTAU_float, "FCVTAU (scalar)", "z0011110yy100101000000nnnnnddddd") -INST(FMOV_float_gen, "FMOV (general)", "z0011110yy10r11o000000nnnnnddddd") -INST(FCVTPS_float, "FCVTPS (scalar)", "z0011110yy101000000000nnnnnddddd") -INST(FCVTPU_float, "FCVTPU (scalar)", "z0011110yy101001000000nnnnnddddd") -INST(FCVTMS_float, "FCVTMS (scalar)", "z0011110yy110000000000nnnnnddddd") -INST(FCVTMU_float, "FCVTMU (scalar)", "z0011110yy110001000000nnnnnddddd") -INST(FCVTZS_float_int, "FCVTZS (scalar, integer)", "z0011110yy111000000000nnnnnddddd") -INST(FCVTZU_float_int, "FCVTZU (scalar, integer)", "z0011110yy111001000000nnnnnddddd") -//INST(FJCVTZS, "FJCVTZS", "0001111001111110000000nnnnnddddd") - -// Data Processing - FP and SIMD - Floating point data processing -INST(FMOV_float, "FMOV (register)", "00011110yy100000010000nnnnnddddd") -INST(FABS_float, "FABS (scalar)", "00011110yy100000110000nnnnnddddd") -INST(FNEG_float, "FNEG (scalar)", "00011110yy100001010000nnnnnddddd") -INST(FSQRT_float, "FSQRT (scalar)", "00011110yy100001110000nnnnnddddd") -INST(FCVT_float, "FCVT", "00011110yy10001oo10000nnnnnddddd") -INST(FRINTN_float, "FRINTN (scalar)", "00011110yy100100010000nnnnnddddd") -INST(FRINTP_float, "FRINTP (scalar)", "00011110yy100100110000nnnnnddddd") -INST(FRINTM_float, "FRINTM (scalar)", "00011110yy100101010000nnnnnddddd") -INST(FRINTZ_float, "FRINTZ (scalar)", "00011110yy100101110000nnnnnddddd") -INST(FRINTA_float, "FRINTA (scalar)", "00011110yy100110010000nnnnnddddd") -INST(FRINTX_float, "FRINTX (scalar)", "00011110yy100111010000nnnnnddddd") -INST(FRINTI_float, "FRINTI (scalar)", "00011110yy100111110000nnnnnddddd") -//INST(FRINT32X_float, "FRINT32X (scalar)", "00011110yy101000110000nnnnnddddd") // ARMv8.5 -//INST(FRINT64X_float, "FRINT64X (scalar)", "00011110yy101001110000nnnnnddddd") // ARMv8.5 -//INST(FRINT32Z_float, "FRINT32Z (scalar)", "00011110yy101000010000nnnnnddddd") // ARMv8.5 -//INST(FRINT64Z_float, "FRINT64Z (scalar)", "00011110yy101001010000nnnnnddddd") // ARMv8.5 - -// Data Processing - FP and SIMD - Floating point compare -INST(FCMP_float, "FCMP", "00011110yy1mmmmm001000nnnnn0o000") -INST(FCMPE_float, "FCMPE", "00011110yy1mmmmm001000nnnnn1o000") - -// Data Processing - FP and SIMD - Floating point immediate -INST(FMOV_float_imm, "FMOV (scalar, immediate)", "00011110yy1iiiiiiii10000000ddddd") - -// Data Processing - FP and SIMD - Floating point conditional compare -INST(FCCMP_float, "FCCMP", "00011110yy1mmmmmcccc01nnnnn0ffff") -INST(FCCMPE_float, "FCCMPE", "00011110yy1mmmmmcccc01nnnnn1ffff") - -// Data Processing - FP and SIMD - Floating point data processing two register -INST(FMUL_float, "FMUL (scalar)", "00011110yy1mmmmm000010nnnnnddddd") -INST(FDIV_float, "FDIV (scalar)", "00011110yy1mmmmm000110nnnnnddddd") -INST(FADD_float, "FADD (scalar)", "00011110yy1mmmmm001010nnnnnddddd") -INST(FSUB_float, "FSUB (scalar)", "00011110yy1mmmmm001110nnnnnddddd") -INST(FMAX_float, "FMAX (scalar)", "00011110yy1mmmmm010010nnnnnddddd") -INST(FMIN_float, "FMIN (scalar)", "00011110yy1mmmmm010110nnnnnddddd") -INST(FMAXNM_float, "FMAXNM (scalar)", "00011110yy1mmmmm011010nnnnnddddd") -INST(FMINNM_float, "FMINNM (scalar)", "00011110yy1mmmmm011110nnnnnddddd") -INST(FNMUL_float, "FNMUL (scalar)", "00011110yy1mmmmm100010nnnnnddddd") - -// Data Processing - FP and SIMD - Floating point conditional select -INST(FCSEL_float, "FCSEL", "00011110yy1mmmmmcccc11nnnnnddddd") - -// Data Processing - FP and SIMD - Floating point data processing three register -INST(FMADD_float, "FMADD", "00011111yy0mmmmm0aaaaannnnnddddd") -INST(FMSUB_float, "FMSUB", "00011111yy0mmmmm1aaaaannnnnddddd") -INST(FNMADD_float, "FNMADD", "00011111yy1mmmmm0aaaaannnnnddddd") -INST(FNMSUB_float, "FNMSUB", "00011111yy1mmmmm1aaaaannnnnddddd") - -// BFloat16 -//INST(BFCVT, "BFCVT", "0001111001100011010000nnnnnddddd") // v8.6 -//INST(BFCVTN, "BFCVTN{2}", "0Q00111010100001011010nnnnnddddd") // v8.6 -//INST(BFDOT_element, "BFDOT (by element)", "0Q00111101LMmmmm1111H0nnnnnddddd") // v8.6 -//INST(BFDOT_vec, "BFDOT (vector)", "0Q101110010mmmmm111111nnnnnddddd") // v8.6 -//INST(BFMLALX_element, "BFMLALX (by element)", "0Q00111111LMmmmm1111H0nnnnnddddd") // v8.6 -//INST(BFMLALX_vector, "BFMLALX (vector)", "0Q101110110mmmmm111111nnnnnddddd") // v8.6 -//INST(BFMMLA, "BFMMLA", "01101110010mmmmm111011nnnnnddddd") // v8.6 +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// DO NOT REORDER + +INST(FMOV_3, "FMOV (vector, immediate)", "0Q00111100000abc111111defghddddd") +INST(FMOV_2, "FMOV (vector, immediate)", "0Qo0111100000abc111101defghddddd") +INST(MOVI, "MOVI, MVNI, ORR, BIC (vector, immediate)", "0Qo0111100000abcmmmm01defghddddd") +INST(UnallocatedEncoding, "Unallocated SIMD modified immediate", "0--0111100000-------11----------") +INST(NOP, "NOP", "11010101000000110010000000011111") +INST(YIELD, "YIELD", "11010101000000110010000000111111") +INST(WFE, "WFE", "11010101000000110010000001011111") +INST(WFI, "WFI", "11010101000000110010000001111111") +INST(SEV, "SEV", "11010101000000110010000010011111") +INST(SEVL, "SEVL", "11010101000000110010000010111111") +INST(CFINV, "CFINV", "11010101000000000100000000011111") +INST(XAFlag, "XAFlag", "11010101000000000100000000111111") +INST(AXFlag, "AXFlag", "11010101000000000100000001011111") +INST(IC_IALLU, "IC IALLU", "11010101000010000111010100011111") +INST(IC_IALLUIS, "IC IALLUIS", "11010101000010000111000100011111") +INST(CLREX, "CLREX", "11010101000000110011MMMM01011111") +INST(DSB, "DSB", "11010101000000110011MMMM10011111") +INST(DMB, "DMB", "11010101000000110011MMMM10111111") +INST(ISB, "ISB", "11010101000000110011MMMM11011111") +INST(DC_IVAC, "DC IVAC", "110101010000100001110110001ttttt") +INST(DC_ISW, "DC ISW", "110101010000100001110110010ttttt") +INST(DC_CSW, "DC CSW", "110101010000100001111010010ttttt") +INST(DC_CISW, "DC CISW", "110101010000100001111110010ttttt") +INST(DC_ZVA, "DC ZVA", "110101010000101101110100001ttttt") +INST(DC_CVAC, "DC CVAC", "110101010000101101111010001ttttt") +INST(DC_CVAU, "DC CVAU", "110101010000101101111011001ttttt") +INST(DC_CVAP, "DC CVAP", "110101010000101101111100001ttttt") +INST(DC_CIVAC, "DC CIVAC", "110101010000101101111110001ttttt") +INST(IC_IVAU, "IC IVAU", "110101010000101101110101001ttttt") +INST(BLR, "BLR", "1101011000111111000000nnnnn00000") +INST(BR, "BR", "1101011000011111000000nnnnn00000") +INST(RET, "RET", "1101011001011111000000nnnnn00000") +INST(HINT, "HINT", "11010101000000110010MMMMooo11111") +INST(SXTB_1, "SXTB (32-bit)", "0001001100000000000111nnnnnddddd") +INST(SXTB_2, "SXTB (64-bit)", "1001001101000000000111nnnnnddddd") +INST(SXTH_1, "SXTH (32-bit)", "0001001100000000001111nnnnnddddd") +INST(SXTH_2, "SXTH (64-bit)", "1001001101000000001111nnnnnddddd") +INST(SXTW, "SXTW", "1001001101000000011111nnnnnddddd") +INST(REV32_int, "REV32", "1101101011000000000010nnnnnddddd") +INST(AESE, "AESE", "0100111000101000010010nnnnnddddd") +INST(AESD, "AESD", "0100111000101000010110nnnnnddddd") +INST(AESMC, "AESMC", "0100111000101000011010nnnnnddddd") +INST(AESIMC, "AESIMC", "0100111000101000011110nnnnnddddd") +INST(SHA1H, "SHA1H", "0101111000101000000010nnnnnddddd") +INST(SHA1SU1, "SHA1SU1", "0101111000101000000110nnnnnddddd") +INST(SHA256SU0, "SHA256SU0", "0101111000101000001010nnnnnddddd") +INST(FCMEQ_zero_1, "FCMEQ (zero)", "0101111011111000110110nnnnnddddd") +INST(FRECPE_1, "FRECPE", "0101111011111001110110nnnnnddddd") +INST(FRECPX_1, "FRECPX", "0101111011111001111110nnnnnddddd") +INST(FRSQRTE_1, "FRSQRTE", "0111111011111001110110nnnnnddddd") +INST(SHA512SU0, "SHA512SU0", "1100111011000000100000nnnnnddddd") +INST(SM4E, "SM4E", "1100111011000000100001nnnnnddddd") +INST(RBIT_int, "RBIT", "z101101011000000000000nnnnnddddd") +INST(REV16_int, "REV16", "z101101011000000000001nnnnnddddd") +INST(CLZ_int, "CLZ", "z101101011000000000100nnnnnddddd") +INST(CLS_int, "CLS", "z101101011000000000101nnnnnddddd") +INST(FCVTNS_2, "FCVTNS (vector)", "010111100z100001101010nnnnnddddd") +INST(FCVTMS_2, "FCVTMS (vector)", "010111100z100001101110nnnnnddddd") +INST(FCVTAS_2, "FCVTAS (vector)", "010111100z100001110010nnnnnddddd") +INST(SCVTF_int_2, "SCVTF (vector, integer)", "010111100z100001110110nnnnnddddd") +INST(FCMGT_zero_2, "FCMGT (zero)", "010111101z100000110010nnnnnddddd") +INST(FCMEQ_zero_2, "FCMEQ (zero)", "010111101z100000110110nnnnnddddd") +INST(FCMLT_2, "FCMLT (zero)", "010111101z100000111010nnnnnddddd") +INST(FCVTPS_2, "FCVTPS (vector)", "010111101z100001101010nnnnnddddd") +INST(FCVTZS_int_2, "FCVTZS (vector, integer)", "010111101z100001101110nnnnnddddd") +INST(FRECPE_2, "FRECPE", "010111101z100001110110nnnnnddddd") +INST(FRECPX_2, "FRECPX", "010111101z100001111110nnnnnddddd") +INST(FCVTNU_2, "FCVTNU (vector)", "011111100z100001101010nnnnnddddd") +INST(FCVTMU_2, "FCVTMU (vector)", "011111100z100001101110nnnnnddddd") +INST(FCVTAU_2, "FCVTAU (vector)", "011111100z100001110010nnnnnddddd") +INST(UCVTF_int_2, "UCVTF (vector, integer)", "011111100z100001110110nnnnnddddd") +INST(FCMGE_zero_2, "FCMGE (zero)", "011111101z100000110010nnnnnddddd") +INST(FCMLE_2, "FCMLE (zero)", "011111101z100000110110nnnnnddddd") +INST(FCVTPU_2, "FCVTPU (vector)", "011111101z100001101010nnnnnddddd") +INST(FCVTZU_int_2, "FCVTZU (vector, integer)", "011111101z100001101110nnnnnddddd") +INST(FRSQRTE_2, "FRSQRTE", "011111101z100001110110nnnnnddddd") +INST(FCVTXN_1, "FCVTXN, FCVTXN2", "011111100z100001011010nnnnnddddd") +INST(FMAXNMP_pair_2, "FMAXNMP (scalar)", "011111100z110000110010nnnnnddddd") +INST(FADDP_pair_2, "FADDP (scalar)", "011111100z110000110110nnnnnddddd") +INST(FMAXP_pair_2, "FMAXP (scalar)", "011111100z110000111110nnnnnddddd") +INST(FMINNMP_pair_2, "FMINNMP (scalar)", "011111101z110000110010nnnnnddddd") +INST(FMINP_pair_2, "FMINP (scalar)", "011111101z110000111110nnnnnddddd") +INST(FRINTN_1, "FRINTN (vector)", "0Q00111001111001100010nnnnnddddd") +INST(FRINTM_1, "FRINTM (vector)", "0Q00111001111001100110nnnnnddddd") +INST(FCMEQ_zero_3, "FCMEQ (zero)", "0Q00111011111000110110nnnnnddddd") +INST(FABS_1, "FABS (vector)", "0Q00111011111000111110nnnnnddddd") +INST(FRINTP_1, "FRINTP (vector)", "0Q00111011111001100010nnnnnddddd") +INST(FRINTZ_1, "FRINTZ (vector)", "0Q00111011111001100110nnnnnddddd") +INST(FRECPE_3, "FRECPE", "0Q00111011111001110110nnnnnddddd") +INST(FRINTA_1, "FRINTA (vector)", "0Q10111001111001100010nnnnnddddd") +INST(FRINTX_1, "FRINTX (vector)", "0Q10111001111001100110nnnnnddddd") +INST(NOT, "NOT", "0Q10111000100000010110nnnnnddddd") +INST(RBIT_asimd, "RBIT (vector)", "0Q10111001100000010110nnnnnddddd") +INST(FNEG_1, "FNEG (vector)", "0Q10111011111000111110nnnnnddddd") +INST(FRINTI_1, "FRINTI (vector)", "0Q10111011111001100110nnnnnddddd") +INST(FRSQRTE_3, "FRSQRTE", "0Q10111011111001110110nnnnnddddd") +INST(LDXR, "LDXRB, LDXRH, LDXR", "zz00100001011111011111nnnnnttttt") +INST(LDAXR, "LDAXRB, LDAXRH, LDAXR", "zz00100001011111111111nnnnnttttt") +INST(STLLR, "STLLRB, STLLRH, STLLR", "zz00100010011111011111nnnnnttttt") +INST(STLR, "STLRB, STLRH, STLR", "zz00100010011111111111nnnnnttttt") +INST(LDLAR, "LDLARB, LDLARH, LDLAR", "zz00100011011111011111nnnnnttttt") +INST(LDAR, "LDARB, LDARH, LDAR", "zz00100011011111111111nnnnnttttt") +INST(REV, "REV", "z10110101100000000001onnnnnddddd") +INST(SUQADD_1, "SUQADD", "01011110zz100000001110nnnnnddddd") +INST(SQABS_1, "SQABS", "01011110zz100000011110nnnnnddddd") +INST(CMGT_zero_1, "CMGT (zero)", "01011110zz100000100010nnnnnddddd") +INST(CMEQ_zero_1, "CMEQ (zero)", "01011110zz100000100110nnnnnddddd") +INST(CMLT_1, "CMLT (zero)", "01011110zz100000101010nnnnnddddd") +INST(ABS_1, "ABS", "01011110zz100000101110nnnnnddddd") +INST(SQXTN_1, "SQXTN, SQXTN2", "01011110zz100001010010nnnnnddddd") +INST(USQADD_1, "USQADD", "01111110zz100000001110nnnnnddddd") +INST(SQNEG_1, "SQNEG", "01111110zz100000011110nnnnnddddd") +INST(CMGE_zero_1, "CMGE (zero)", "01111110zz100000100010nnnnnddddd") +INST(CMLE_1, "CMLE (zero)", "01111110zz100000100110nnnnnddddd") +INST(NEG_1, "NEG (vector)", "01111110zz100000101110nnnnnddddd") +INST(SQXTUN_1, "SQXTUN, SQXTUN2", "01111110zz100001001010nnnnnddddd") +INST(UQXTN_1, "UQXTN, UQXTN2", "01111110zz100001010010nnnnnddddd") +INST(ADDP_pair, "ADDP (scalar)", "01011110zz110001101110nnnnnddddd") +INST(FCVTN, "FCVTN, FCVTN2", "0Q0011100z100001011010nnnnnddddd") +INST(FCVTL, "FCVTL, FCVTL2", "0Q0011100z100001011110nnnnnddddd") +INST(FRINTN_2, "FRINTN (vector)", "0Q0011100z100001100010nnnnnddddd") +INST(FRINTM_2, "FRINTM (vector)", "0Q0011100z100001100110nnnnnddddd") +INST(FCVTNS_4, "FCVTNS (vector)", "0Q0011100z100001101010nnnnnddddd") +INST(FCVTMS_4, "FCVTMS (vector)", "0Q0011100z100001101110nnnnnddddd") +INST(FCVTAS_4, "FCVTAS (vector)", "0Q0011100z100001110010nnnnnddddd") +INST(SCVTF_int_4, "SCVTF (vector, integer)", "0Q0011100z100001110110nnnnnddddd") +INST(FCMGT_zero_4, "FCMGT (zero)", "0Q0011101z100000110010nnnnnddddd") +INST(FCMEQ_zero_4, "FCMEQ (zero)", "0Q0011101z100000110110nnnnnddddd") +INST(FCMLT_4, "FCMLT (zero)", "0Q0011101z100000111010nnnnnddddd") +INST(FABS_2, "FABS (vector)", "0Q0011101z100000111110nnnnnddddd") +INST(FRINTP_2, "FRINTP (vector)", "0Q0011101z100001100010nnnnnddddd") +INST(FRINTZ_2, "FRINTZ (vector)", "0Q0011101z100001100110nnnnnddddd") +INST(FCVTPS_4, "FCVTPS (vector)", "0Q0011101z100001101010nnnnnddddd") +INST(FCVTZS_int_4, "FCVTZS (vector, integer)", "0Q0011101z100001101110nnnnnddddd") +INST(URECPE, "URECPE", "0Q0011101z100001110010nnnnnddddd") +INST(FRECPE_4, "FRECPE", "0Q0011101z100001110110nnnnnddddd") +INST(FCVTXN_2, "FCVTXN, FCVTXN2", "0Q1011100z100001011010nnnnnddddd") +INST(FRINTA_2, "FRINTA (vector)", "0Q1011100z100001100010nnnnnddddd") +INST(FRINTX_2, "FRINTX (vector)", "0Q1011100z100001100110nnnnnddddd") +INST(FCVTNU_4, "FCVTNU (vector)", "0Q1011100z100001101010nnnnnddddd") +INST(FCVTMU_4, "FCVTMU (vector)", "0Q1011100z100001101110nnnnnddddd") +INST(FCVTAU_4, "FCVTAU (vector)", "0Q1011100z100001110010nnnnnddddd") +INST(UCVTF_int_4, "UCVTF (vector, integer)", "0Q1011100z100001110110nnnnnddddd") +INST(FNEG_2, "FNEG (vector)", "0Q1011101z100000111110nnnnnddddd") +INST(FRINTI_2, "FRINTI (vector)", "0Q1011101z100001100110nnnnnddddd") +INST(FCMGE_zero_4, "FCMGE (zero)", "0Q1011101z100000110010nnnnnddddd") +INST(FCMLE_4, "FCMLE (zero)", "0Q1011101z100000110110nnnnnddddd") +INST(FCVTPU_4, "FCVTPU (vector)", "0Q1011101z100001101010nnnnnddddd") +INST(FCVTZU_int_4, "FCVTZU (vector, integer)", "0Q1011101z100001101110nnnnnddddd") +INST(URSQRTE, "URSQRTE", "0Q1011101z100001110010nnnnnddddd") +INST(FRSQRTE_4, "FRSQRTE", "0Q1011101z100001110110nnnnnddddd") +INST(FSQRT_2, "FSQRT (vector)", "0Q1011101z100001111110nnnnnddddd") +INST(FMAXNMV_2, "FMAXNMV", "0Q1011100z110000110010nnnnnddddd") +INST(FMAXV_2, "FMAXV", "0Q1011100z110000111110nnnnnddddd") +INST(FMINNMV_2, "FMINNMV", "0Q1011101z110000110010nnnnnddddd") +INST(FMINV_2, "FMINV", "0Q1011101z110000111110nnnnnddddd") +INST(FMOV_float, "FMOV (register)", "00011110yy100000010000nnnnnddddd") +INST(FABS_float, "FABS (scalar)", "00011110yy100000110000nnnnnddddd") +INST(FNEG_float, "FNEG (scalar)", "00011110yy100001010000nnnnnddddd") +INST(FSQRT_float, "FSQRT (scalar)", "00011110yy100001110000nnnnnddddd") +INST(FRINTN_float, "FRINTN (scalar)", "00011110yy100100010000nnnnnddddd") +INST(FRINTP_float, "FRINTP (scalar)", "00011110yy100100110000nnnnnddddd") +INST(FRINTM_float, "FRINTM (scalar)", "00011110yy100101010000nnnnnddddd") +INST(FRINTZ_float, "FRINTZ (scalar)", "00011110yy100101110000nnnnnddddd") +INST(FRINTA_float, "FRINTA (scalar)", "00011110yy100110010000nnnnnddddd") +INST(FRINTX_float, "FRINTX (scalar)", "00011110yy100111010000nnnnnddddd") +INST(FRINTI_float, "FRINTI (scalar)", "00011110yy100111110000nnnnnddddd") +INST(LD1R_1, "LD1R", "0Q001101010000001100zznnnnnttttt") +INST(LD3R_1, "LD3R", "0Q001101010000001110zznnnnnttttt") +INST(LD2R_1, "LD2R", "0Q001101011000001100zznnnnnttttt") +INST(LD4R_1, "LD4R", "0Q001101011000001110zznnnnnttttt") +INST(REV64_asimd, "REV64", "0Q001110zz100000000010nnnnnddddd") +INST(REV16_asimd, "REV16 (vector)", "0Q001110zz100000000110nnnnnddddd") +INST(SADDLP, "SADDLP", "0Q001110zz100000001010nnnnnddddd") +INST(SUQADD_2, "SUQADD", "0Q001110zz100000001110nnnnnddddd") +INST(CLS_asimd, "CLS (vector)", "0Q001110zz100000010010nnnnnddddd") +INST(CNT, "CNT", "0Q001110zz100000010110nnnnnddddd") +INST(SADALP, "SADALP", "0Q001110zz100000011010nnnnnddddd") +INST(SQABS_2, "SQABS", "0Q001110zz100000011110nnnnnddddd") +INST(CMGT_zero_2, "CMGT (zero)", "0Q001110zz100000100010nnnnnddddd") +INST(CMEQ_zero_2, "CMEQ (zero)", "0Q001110zz100000100110nnnnnddddd") +INST(CMLT_2, "CMLT (zero)", "0Q001110zz100000101010nnnnnddddd") +INST(ABS_2, "ABS", "0Q001110zz100000101110nnnnnddddd") +INST(XTN, "XTN, XTN2", "0Q001110zz100001001010nnnnnddddd") +INST(SQXTN_2, "SQXTN, SQXTN2", "0Q001110zz100001010010nnnnnddddd") +INST(REV32_asimd, "REV32 (vector)", "0Q101110zz100000000010nnnnnddddd") +INST(UADDLP, "UADDLP", "0Q101110zz100000001010nnnnnddddd") +INST(USQADD_2, "USQADD", "0Q101110zz100000001110nnnnnddddd") +INST(CLZ_asimd, "CLZ (vector)", "0Q101110zz100000010010nnnnnddddd") +INST(UADALP, "UADALP", "0Q101110zz100000011010nnnnnddddd") +INST(SQNEG_2, "SQNEG", "0Q101110zz100000011110nnnnnddddd") +INST(CMGE_zero_2, "CMGE (zero)", "0Q101110zz100000100010nnnnnddddd") +INST(CMLE_2, "CMLE (zero)", "0Q101110zz100000100110nnnnnddddd") +INST(NEG_2, "NEG (vector)", "0Q101110zz100000101110nnnnnddddd") +INST(SQXTUN_2, "SQXTUN, SQXTUN2", "0Q101110zz100001001010nnnnnddddd") +INST(SHLL, "SHLL, SHLL2", "0Q101110zz100001001110nnnnnddddd") +INST(UQXTN_2, "UQXTN, UQXTN2", "0Q101110zz100001010010nnnnnddddd") +INST(SADDLV, "SADDLV", "0Q001110zz110000001110nnnnnddddd") +INST(SMAXV, "SMAXV", "0Q001110zz110000101010nnnnnddddd") +INST(SMINV, "SMINV", "0Q001110zz110001101010nnnnnddddd") +INST(ADDV, "ADDV", "0Q001110zz110001101110nnnnnddddd") +INST(UADDLV, "UADDLV", "0Q101110zz110000001110nnnnnddddd") +INST(UMAXV, "UMAXV", "0Q101110zz110000101010nnnnnddddd") +INST(UMINV, "UMINV", "0Q101110zz110001101010nnnnnddddd") +INST(FCVTNS_float, "FCVTNS (scalar)", "z0011110yy100000000000nnnnnddddd") +INST(FCVTNU_float, "FCVTNU (scalar)", "z0011110yy100001000000nnnnnddddd") +INST(SCVTF_float_int, "SCVTF (scalar, integer)", "z0011110yy100010000000nnnnnddddd") +INST(UCVTF_float_int, "UCVTF (scalar, integer)", "z0011110yy100011000000nnnnnddddd") +INST(FCVTAS_float, "FCVTAS (scalar)", "z0011110yy100100000000nnnnnddddd") +INST(FCVTAU_float, "FCVTAU (scalar)", "z0011110yy100101000000nnnnnddddd") +INST(FCVTPS_float, "FCVTPS (scalar)", "z0011110yy101000000000nnnnnddddd") +INST(FCVTPU_float, "FCVTPU (scalar)", "z0011110yy101001000000nnnnnddddd") +INST(FCVTMS_float, "FCVTMS (scalar)", "z0011110yy110000000000nnnnnddddd") +INST(FCVTMU_float, "FCVTMU (scalar)", "z0011110yy110001000000nnnnnddddd") +INST(FCVTZS_float_int, "FCVTZS (scalar, integer)", "z0011110yy111000000000nnnnnddddd") +INST(FCVTZU_float_int, "FCVTZU (scalar, integer)", "z0011110yy111001000000nnnnnddddd") +INST(FCMP_float, "FCMP", "00011110yy1mmmmm001000nnnnn0o000") +INST(FCMPE_float, "FCMPE", "00011110yy1mmmmm001000nnnnn1o000") +INST(FCVT_float, "FCVT", "00011110yy10001oo10000nnnnnddddd") +INST(ASR_1, "ASR (immediate, 32-bit)", "00010011000rrrrr011111nnnnnddddd") +INST(RMIF, "RMIF", "10111010000iiiiii00001nnnnn0IIII") +INST(SMULH, "SMULH", "10011011010mmmmm011111nnnnnddddd") +INST(UMULH, "UMULH", "10011011110mmmmm011111nnnnnddddd") +INST(SHA1C, "SHA1C", "01011110000mmmmm000000nnnnnddddd") +INST(SHA1P, "SHA1P", "01011110000mmmmm000100nnnnnddddd") +INST(SHA1M, "SHA1M", "01011110000mmmmm001000nnnnnddddd") +INST(SHA1SU0, "SHA1SU0", "01011110000mmmmm001100nnnnnddddd") +INST(SHA256H, "SHA256H", "01011110000mmmmm010000nnnnnddddd") +INST(SHA256H2, "SHA256H2", "01011110000mmmmm010100nnnnnddddd") +INST(SHA256SU1, "SHA256SU1", "01011110000mmmmm011000nnnnnddddd") +INST(DUP_elt_1, "DUP (element)", "01011110000iiiii000001nnnnnddddd") +INST(FCMEQ_reg_1, "FCMEQ (register)", "01011110010mmmmm001001nnnnnddddd") +INST(FRECPS_1, "FRECPS", "01011110010mmmmm001111nnnnnddddd") +INST(FRSQRTS_1, "FRSQRTS", "01011110110mmmmm001111nnnnnddddd") +INST(INS_gen, "INS (general)", "01001110000iiiii000111nnnnnddddd") +INST(SHA512H, "SHA512H", "11001110011mmmmm100000nnnnnddddd") +INST(SHA512H2, "SHA512H2", "11001110011mmmmm100001nnnnnddddd") +INST(SHA512SU1, "SHA512SU1", "11001110011mmmmm100010nnnnnddddd") +INST(RAX1, "RAX1", "11001110011mmmmm100011nnnnnddddd") +INST(SM3PARTW1, "SM3PARTW1", "11001110011mmmmm110000nnnnnddddd") +INST(SM3PARTW2, "SM3PARTW2", "11001110011mmmmm110001nnnnnddddd") +INST(SM4EKEY, "SM4EKEY", "11001110011mmmmm110010nnnnnddddd") +INST(FMOV_float_gen, "FMOV (general)", "z0011110yy10r11o000000nnnnnddddd") +INST(FMOV_float_imm, "FMOV (scalar, immediate)", "00011110yy1iiiiiiii10000000ddddd") +INST(ASR_2, "ASR (immediate, 64-bit)", "1001001101rrrrrr111111nnnnnddddd") +INST(SVC, "SVC", "11010100000iiiiiiiiiiiiiiii00001") +INST(BRK, "BRK", "11010100001iiiiiiiiiiiiiiii00000") +INST(ST1_sngl_1, "ST1 (single structure)", "0Q00110100000000oo0Szznnnnnttttt") +INST(ST3_sngl_1, "ST3 (single structure)", "0Q00110100000000oo1Szznnnnnttttt") +INST(ST2_sngl_1, "ST2 (single structure)", "0Q00110100100000oo0Szznnnnnttttt") +INST(ST4_sngl_1, "ST4 (single structure)", "0Q00110100100000oo1Szznnnnnttttt") +INST(LD1_sngl_1, "LD1 (single structure)", "0Q00110101000000oo0Szznnnnnttttt") +INST(LD3_sngl_1, "LD3 (single structure)", "0Q00110101000000oo1Szznnnnnttttt") +INST(LD2_sngl_1, "LD2 (single structure)", "0Q00110101100000oo0Szznnnnnttttt") +INST(LD4_sngl_1, "LD4 (single structure)", "0Q00110101100000oo1Szznnnnnttttt") +INST(LDXP, "LDXP", "1z001000011111110uuuuunnnnnttttt") +INST(LDAXP, "LDAXP", "1z001000011111111uuuuunnnnnttttt") +INST(UDIV, "UDIV", "z0011010110mmmmm000010nnnnnddddd") +INST(SDIV, "SDIV", "z0011010110mmmmm000011nnnnnddddd") +INST(LSLV, "LSLV", "z0011010110mmmmm001000nnnnnddddd") +INST(LSRV, "LSRV", "z0011010110mmmmm001001nnnnnddddd") +INST(ASRV, "ASRV", "z0011010110mmmmm001010nnnnnddddd") +INST(RORV, "RORV", "z0011010110mmmmm001011nnnnnddddd") +INST(ADC, "ADC", "z0011010000mmmmm000000nnnnnddddd") +INST(ADCS, "ADCS", "z0111010000mmmmm000000nnnnnddddd") +INST(SBC, "SBC", "z1011010000mmmmm000000nnnnnddddd") +INST(SBCS, "SBCS", "z1111010000mmmmm000000nnnnnddddd") +INST(FMULX_vec_2, "FMULX", "010111100z1mmmmm110111nnnnnddddd") +INST(FCMEQ_reg_2, "FCMEQ (register)", "010111100z1mmmmm111001nnnnnddddd") +INST(FRECPS_2, "FRECPS", "010111100z1mmmmm111111nnnnnddddd") +INST(FRSQRTS_2, "FRSQRTS", "010111101z1mmmmm111111nnnnnddddd") +INST(FCMGE_reg_2, "FCMGE (register)", "011111100z1mmmmm111001nnnnnddddd") +INST(FACGE_2, "FACGE", "011111100z1mmmmm111011nnnnnddddd") +INST(FABD_2, "FABD", "011111101z1mmmmm110101nnnnnddddd") +INST(FCMGT_reg_2, "FCMGT (register)", "011111101z1mmmmm111001nnnnnddddd") +INST(FACGT_2, "FACGT", "011111101z1mmmmm111011nnnnnddddd") +INST(DUP_elt_2, "DUP (element)", "0Q001110000iiiii000001nnnnnddddd") +INST(DUP_gen, "DUP (general)", "0Q001110000iiiii000011nnnnnddddd") +INST(SMOV, "SMOV", "0Q001110000iiiii001011nnnnnddddd") +INST(UMOV, "UMOV", "0Q001110000iiiii001111nnnnnddddd") +INST(FCMEQ_reg_3, "FCMEQ (register)", "0Q001110010mmmmm001001nnnnnddddd") +INST(FRECPS_3, "FRECPS", "0Q001110010mmmmm001111nnnnnddddd") +INST(FRSQRTS_3, "FRSQRTS", "0Q001110110mmmmm001111nnnnnddddd") +INST(FMLA_vec_1, "FMLA (vector)", "0Q001110010mmmmm000011nnnnnddddd") +INST(FMLS_vec_1, "FMLS (vector)", "0Q001110110mmmmm000011nnnnnddddd") +INST(AND_asimd, "AND (vector)", "0Q001110001mmmmm000111nnnnnddddd") +INST(BIC_asimd_reg, "BIC (vector, register)", "0Q001110011mmmmm000111nnnnnddddd") +INST(ORR_asimd_reg, "ORR (vector, register)", "0Q001110101mmmmm000111nnnnnddddd") +INST(ORN_asimd, "ORN (vector)", "0Q001110111mmmmm000111nnnnnddddd") +INST(EOR_asimd, "EOR (vector)", "0Q101110001mmmmm000111nnnnnddddd") +INST(BSL, "BSL", "0Q101110011mmmmm000111nnnnnddddd") +INST(BIT, "BIT", "0Q101110101mmmmm000111nnnnnddddd") +INST(BIF, "BIF", "0Q101110111mmmmm000111nnnnnddddd") +INST(STx_mult_1, "STx (multiple structures)", "0Q00110000000000oooozznnnnnttttt") +INST(LDx_mult_1, "LDx (multiple structures)", "0Q00110001000000oooozznnnnnttttt") +INST(STXR, "STXRB, STXRH, STXR", "zz001000000sssss011111nnnnnttttt") +INST(STLXR, "STLXRB, STLXRH, STLXR", "zz001000000sssss111111nnnnnttttt") +INST(SQADD_1, "SQADD", "01011110zz1mmmmm000011nnnnnddddd") +INST(SQSUB_1, "SQSUB", "01011110zz1mmmmm001011nnnnnddddd") +INST(CMGT_reg_1, "CMGT (register)", "01011110zz1mmmmm001101nnnnnddddd") +INST(CMGE_reg_1, "CMGE (register)", "01011110zz1mmmmm001111nnnnnddddd") +INST(SSHL_1, "SSHL", "01011110zz1mmmmm010001nnnnnddddd") +INST(SQSHL_reg_1, "SQSHL (register)", "01011110zz1mmmmm010011nnnnnddddd") +INST(SRSHL_1, "SRSHL", "01011110zz1mmmmm010101nnnnnddddd") +INST(ADD_1, "ADD (vector)", "01011110zz1mmmmm100001nnnnnddddd") +INST(CMTST_1, "CMTST", "01011110zz1mmmmm100011nnnnnddddd") +INST(SQDMULH_vec_1, "SQDMULH (vector)", "01011110zz1mmmmm101101nnnnnddddd") +INST(UQADD_1, "UQADD", "01111110zz1mmmmm000011nnnnnddddd") +INST(UQSUB_1, "UQSUB", "01111110zz1mmmmm001011nnnnnddddd") +INST(CMHI_1, "CMHI (register)", "01111110zz1mmmmm001101nnnnnddddd") +INST(CMHS_1, "CMHS (register)", "01111110zz1mmmmm001111nnnnnddddd") +INST(USHL_1, "USHL", "01111110zz1mmmmm010001nnnnnddddd") +INST(UQSHL_reg_1, "UQSHL (register)", "01111110zz1mmmmm010011nnnnnddddd") +INST(URSHL_1, "URSHL", "01111110zz1mmmmm010101nnnnnddddd") +INST(SUB_1, "SUB (vector)", "01111110zz1mmmmm100001nnnnnddddd") +INST(CMEQ_reg_1, "CMEQ (register)", "01111110zz1mmmmm100011nnnnnddddd") +INST(SQRDMULH_vec_1, "SQRDMULH (vector)", "01111110zz1mmmmm101101nnnnnddddd") +INST(SSHR_1, "SSHR", "010111110IIIIiii000001nnnnnddddd") +INST(SSRA_1, "SSRA", "010111110IIIIiii000101nnnnnddddd") +INST(SRSHR_1, "SRSHR", "010111110IIIIiii001001nnnnnddddd") +INST(SRSRA_1, "SRSRA", "010111110IIIIiii001101nnnnnddddd") +INST(SHL_1, "SHL", "010111110IIIIiii010101nnnnnddddd") +INST(SQSHL_imm_1, "SQSHL (immediate)", "010111110IIIIiii011101nnnnnddddd") +INST(SQSHRN_1, "SQSHRN, SQSHRN2", "010111110IIIIiii100101nnnnnddddd") +INST(SCVTF_fix_1, "SCVTF (vector, fixed-point)", "010111110IIIIiii111001nnnnnddddd") +INST(FCVTZS_fix_1, "FCVTZS (vector, fixed-point)", "010111110IIIIiii111111nnnnnddddd") +INST(USHR_1, "USHR", "011111110IIIIiii000001nnnnnddddd") +INST(USRA_1, "USRA", "011111110IIIIiii000101nnnnnddddd") +INST(URSHR_1, "URSHR", "011111110IIIIiii001001nnnnnddddd") +INST(URSRA_1, "URSRA", "011111110IIIIiii001101nnnnnddddd") +INST(SRI_1, "SRI", "011111110IIIIiii010001nnnnnddddd") +INST(SLI_1, "SLI", "011111110IIIIiii010101nnnnnddddd") +INST(SQSHLU_1, "SQSHLU", "011111110IIIIiii011001nnnnnddddd") +INST(UQSHL_imm_1, "UQSHL (immediate)", "011111110IIIIiii011101nnnnnddddd") +INST(SQSHRUN_1, "SQSHRUN, SQSHRUN2", "011111110IIIIiii100001nnnnnddddd") +INST(UQSHRN_1, "UQSHRN, UQSHRN2", "011111110IIIIiii100101nnnnnddddd") +INST(UCVTF_fix_1, "UCVTF (vector, fixed-point)", "011111110IIIIiii111001nnnnnddddd") +INST(FCVTZU_fix_1, "FCVTZU (vector, fixed-point)", "011111110IIIIiii111111nnnnnddddd") +INST(FMLA_elt_1, "FMLA (by element)", "0101111100LMmmmm0001H0nnnnnddddd") +INST(FMLS_elt_1, "FMLS (by element)", "0101111100LMmmmm0101H0nnnnnddddd") +INST(FMAXNM_2, "FMAXNM (vector)", "0Q0011100z1mmmmm110001nnnnnddddd") +INST(FMLA_vec_2, "FMLA (vector)", "0Q0011100z1mmmmm110011nnnnnddddd") +INST(FADD_2, "FADD (vector)", "0Q0011100z1mmmmm110101nnnnnddddd") +INST(FMAX_2, "FMAX (vector)", "0Q0011100z1mmmmm111101nnnnnddddd") +INST(FMULX_vec_4, "FMULX", "0Q0011100z1mmmmm110111nnnnnddddd") +INST(FCMEQ_reg_4, "FCMEQ (register)", "0Q0011100z1mmmmm111001nnnnnddddd") +INST(FRECPS_4, "FRECPS", "0Q0011100z1mmmmm111111nnnnnddddd") +INST(FMINNM_2, "FMINNM (vector)", "0Q0011101z1mmmmm110001nnnnnddddd") +INST(FMLS_vec_2, "FMLS (vector)", "0Q0011101z1mmmmm110011nnnnnddddd") +INST(FSUB_2, "FSUB (vector)", "0Q0011101z1mmmmm110101nnnnnddddd") +INST(FMIN_2, "FMIN (vector)", "0Q0011101z1mmmmm111101nnnnnddddd") +INST(FRSQRTS_4, "FRSQRTS", "0Q0011101z1mmmmm111111nnnnnddddd") +INST(FMAXNMP_vec_2, "FMAXNMP (vector)", "0Q1011100z1mmmmm110001nnnnnddddd") +INST(FADDP_vec_2, "FADDP (vector)", "0Q1011100z1mmmmm110101nnnnnddddd") +INST(FMUL_vec_2, "FMUL (vector)", "0Q1011100z1mmmmm110111nnnnnddddd") +INST(FCMGE_reg_4, "FCMGE (register)", "0Q1011100z1mmmmm111001nnnnnddddd") +INST(FACGE_4, "FACGE", "0Q1011100z1mmmmm111011nnnnnddddd") +INST(FMAXP_vec_2, "FMAXP (vector)", "0Q1011100z1mmmmm111101nnnnnddddd") +INST(FDIV_2, "FDIV (vector)", "0Q1011100z1mmmmm111111nnnnnddddd") +INST(FMINNMP_vec_2, "FMINNMP (vector)", "0Q1011101z1mmmmm110001nnnnnddddd") +INST(FABD_4, "FABD", "0Q1011101z1mmmmm110101nnnnnddddd") +INST(FCMGT_reg_4, "FCMGT (register)", "0Q1011101z1mmmmm111001nnnnnddddd") +INST(FACGT_4, "FACGT", "0Q1011101z1mmmmm111011nnnnnddddd") +INST(FMINP_vec_2, "FMINP (vector)", "0Q1011101z1mmmmm111101nnnnnddddd") +INST(SM3TT1A, "SM3TT1A", "11001110010mmmmm10ii00nnnnnddddd") +INST(SM3TT1B, "SM3TT1B", "11001110010mmmmm10ii01nnnnnddddd") +INST(SM3TT2A, "SM3TT2A", "11001110010mmmmm10ii10nnnnnddddd") +INST(SM3TT2B, "SM3TT2B", "11001110010mmmmm10ii11nnnnnddddd") +INST(FMUL_float, "FMUL (scalar)", "00011110yy1mmmmm000010nnnnnddddd") +INST(FDIV_float, "FDIV (scalar)", "00011110yy1mmmmm000110nnnnnddddd") +INST(FADD_float, "FADD (scalar)", "00011110yy1mmmmm001010nnnnnddddd") +INST(FSUB_float, "FSUB (scalar)", "00011110yy1mmmmm001110nnnnnddddd") +INST(FMAX_float, "FMAX (scalar)", "00011110yy1mmmmm010010nnnnnddddd") +INST(FMIN_float, "FMIN (scalar)", "00011110yy1mmmmm010110nnnnnddddd") +INST(FMAXNM_float, "FMAXNM (scalar)", "00011110yy1mmmmm011010nnnnnddddd") +INST(FMINNM_float, "FMINNM (scalar)", "00011110yy1mmmmm011110nnnnnddddd") +INST(FNMUL_float, "FNMUL (scalar)", "00011110yy1mmmmm100010nnnnnddddd") +INST(LD1R_2, "LD1R", "0Q001101110mmmmm1100zznnnnnttttt") +INST(LD3R_2, "LD3R", "0Q001101110mmmmm1110zznnnnnttttt") +INST(LD2R_2, "LD2R", "0Q001101111mmmmm1100zznnnnnttttt") +INST(LD4R_2, "LD4R", "0Q001101111mmmmm1110zznnnnnttttt") +INST(CRC32, "CRC32B, CRC32H, CRC32W, CRC32X", "z0011010110mmmmm0100zznnnnnddddd") +INST(CRC32C, "CRC32CB, CRC32CH, CRC32CW, CRC32CX", "z0011010110mmmmm0101zznnnnnddddd") +INST(FMLA_elt_2, "FMLA (by element)", "010111111zLMmmmm0001H0nnnnnddddd") +INST(FMLS_elt_2, "FMLS (by element)", "010111111zLMmmmm0101H0nnnnnddddd") +INST(FMUL_elt_2, "FMUL (by element)", "010111111zLMmmmm1001H0nnnnnddddd") +INST(FMULX_elt_2, "FMULX (by element)", "011111111zLMmmmm1001H0nnnnnddddd") +INST(TBL, "TBL", "0Q001110000mmmmm0LL000nnnnnddddd") +INST(TBX, "TBX", "0Q001110000mmmmm0LL100nnnnnddddd") +INST(UZP1, "UZP1", "0Q001110zz0mmmmm000110nnnnnddddd") +INST(TRN1, "TRN1", "0Q001110zz0mmmmm001010nnnnnddddd") +INST(ZIP1, "ZIP1", "0Q001110zz0mmmmm001110nnnnnddddd") +INST(UZP2, "UZP2", "0Q001110zz0mmmmm010110nnnnnddddd") +INST(TRN2, "TRN2", "0Q001110zz0mmmmm011010nnnnnddddd") +INST(ZIP2, "ZIP2", "0Q001110zz0mmmmm011110nnnnnddddd") +INST(SDOT_vec, "SDOT (vector)", "0Q001110zz0mmmmm100101nnnnnddddd") +INST(UDOT_vec, "UDOT (vector)", "0Q101110zz0mmmmm100101nnnnnddddd") +INST(SADDL, "SADDL, SADDL2", "0Q001110zz1mmmmm000000nnnnnddddd") +INST(SADDW, "SADDW, SADDW2", "0Q001110zz1mmmmm000100nnnnnddddd") +INST(SSUBL, "SSUBL, SSUBL2", "0Q001110zz1mmmmm001000nnnnnddddd") +INST(SSUBW, "SSUBW, SSUBW2", "0Q001110zz1mmmmm001100nnnnnddddd") +INST(ADDHN, "ADDHN, ADDHN2", "0Q001110zz1mmmmm010000nnnnnddddd") +INST(SABAL, "SABAL, SABAL2", "0Q001110zz1mmmmm010100nnnnnddddd") +INST(SUBHN, "SUBHN, SUBHN2", "0Q001110zz1mmmmm011000nnnnnddddd") +INST(SABDL, "SABDL, SABDL2", "0Q001110zz1mmmmm011100nnnnnddddd") +INST(SMLAL_vec, "SMLAL, SMLAL2 (vector)", "0Q001110zz1mmmmm100000nnnnnddddd") +INST(SMLSL_vec, "SMLSL, SMLSL2 (vector)", "0Q001110zz1mmmmm101000nnnnnddddd") +INST(SMULL_vec, "SMULL, SMULL2 (vector)", "0Q001110zz1mmmmm110000nnnnnddddd") +INST(PMULL, "PMULL, PMULL2", "0Q001110zz1mmmmm111000nnnnnddddd") +INST(UADDL, "UADDL, UADDL2", "0Q101110zz1mmmmm000000nnnnnddddd") +INST(UADDW, "UADDW, UADDW2", "0Q101110zz1mmmmm000100nnnnnddddd") +INST(USUBL, "USUBL, USUBL2", "0Q101110zz1mmmmm001000nnnnnddddd") +INST(USUBW, "USUBW, USUBW2", "0Q101110zz1mmmmm001100nnnnnddddd") +INST(RADDHN, "RADDHN, RADDHN2", "0Q101110zz1mmmmm010000nnnnnddddd") +INST(UABAL, "UABAL, UABAL2", "0Q101110zz1mmmmm010100nnnnnddddd") +INST(RSUBHN, "RSUBHN, RSUBHN2", "0Q101110zz1mmmmm011000nnnnnddddd") +INST(UABDL, "UABDL, UABDL2", "0Q101110zz1mmmmm011100nnnnnddddd") +INST(UMLAL_vec, "UMLAL, UMLAL2 (vector)", "0Q101110zz1mmmmm100000nnnnnddddd") +INST(UMLSL_vec, "UMLSL, UMLSL2 (vector)", "0Q101110zz1mmmmm101000nnnnnddddd") +INST(UMULL_vec, "UMULL, UMULL2 (vector)", "0Q101110zz1mmmmm110000nnnnnddddd") +INST(SQDMULL_vec_2, "SQDMULL, SQDMULL2 (vector)", "0Q001110zz1mmmmm110100nnnnnddddd") +INST(SHADD, "SHADD", "0Q001110zz1mmmmm000001nnnnnddddd") +INST(SQADD_2, "SQADD", "0Q001110zz1mmmmm000011nnnnnddddd") +INST(SRHADD, "SRHADD", "0Q001110zz1mmmmm000101nnnnnddddd") +INST(SHSUB, "SHSUB", "0Q001110zz1mmmmm001001nnnnnddddd") +INST(SQSUB_2, "SQSUB", "0Q001110zz1mmmmm001011nnnnnddddd") +INST(CMGT_reg_2, "CMGT (register)", "0Q001110zz1mmmmm001101nnnnnddddd") +INST(CMGE_reg_2, "CMGE (register)", "0Q001110zz1mmmmm001111nnnnnddddd") +INST(SSHL_2, "SSHL", "0Q001110zz1mmmmm010001nnnnnddddd") +INST(SQSHL_reg_2, "SQSHL (register)", "0Q001110zz1mmmmm010011nnnnnddddd") +INST(SRSHL_2, "SRSHL", "0Q001110zz1mmmmm010101nnnnnddddd") +INST(SMAX, "SMAX", "0Q001110zz1mmmmm011001nnnnnddddd") +INST(SMIN, "SMIN", "0Q001110zz1mmmmm011011nnnnnddddd") +INST(SABD, "SABD", "0Q001110zz1mmmmm011101nnnnnddddd") +INST(SABA, "SABA", "0Q001110zz1mmmmm011111nnnnnddddd") +INST(ADD_vector, "ADD (vector)", "0Q001110zz1mmmmm100001nnnnnddddd") +INST(CMTST_2, "CMTST", "0Q001110zz1mmmmm100011nnnnnddddd") +INST(MLA_vec, "MLA (vector)", "0Q001110zz1mmmmm100101nnnnnddddd") +INST(MUL_vec, "MUL (vector)", "0Q001110zz1mmmmm100111nnnnnddddd") +INST(SMAXP, "SMAXP", "0Q001110zz1mmmmm101001nnnnnddddd") +INST(SMINP, "SMINP", "0Q001110zz1mmmmm101011nnnnnddddd") +INST(SQDMULH_vec_2, "SQDMULH (vector)", "0Q001110zz1mmmmm101101nnnnnddddd") +INST(ADDP_vec, "ADDP (vector)", "0Q001110zz1mmmmm101111nnnnnddddd") +INST(UHADD, "UHADD", "0Q101110zz1mmmmm000001nnnnnddddd") +INST(UQADD_2, "UQADD", "0Q101110zz1mmmmm000011nnnnnddddd") +INST(URHADD, "URHADD", "0Q101110zz1mmmmm000101nnnnnddddd") +INST(UHSUB, "UHSUB", "0Q101110zz1mmmmm001001nnnnnddddd") +INST(UQSUB_2, "UQSUB", "0Q101110zz1mmmmm001011nnnnnddddd") +INST(CMHI_2, "CMHI (register)", "0Q101110zz1mmmmm001101nnnnnddddd") +INST(CMHS_2, "CMHS (register)", "0Q101110zz1mmmmm001111nnnnnddddd") +INST(USHL_2, "USHL", "0Q101110zz1mmmmm010001nnnnnddddd") +INST(UQSHL_reg_2, "UQSHL (register)", "0Q101110zz1mmmmm010011nnnnnddddd") +INST(URSHL_2, "URSHL", "0Q101110zz1mmmmm010101nnnnnddddd") +INST(UMAX, "UMAX", "0Q101110zz1mmmmm011001nnnnnddddd") +INST(UMIN, "UMIN", "0Q101110zz1mmmmm011011nnnnnddddd") +INST(UABD, "UABD", "0Q101110zz1mmmmm011101nnnnnddddd") +INST(UABA, "UABA", "0Q101110zz1mmmmm011111nnnnnddddd") +INST(SUB_2, "SUB (vector)", "0Q101110zz1mmmmm100001nnnnnddddd") +INST(CMEQ_reg_2, "CMEQ (register)", "0Q101110zz1mmmmm100011nnnnnddddd") +INST(MLS_vec, "MLS (vector)", "0Q101110zz1mmmmm100101nnnnnddddd") +INST(PMUL, "PMUL", "0Q101110zz1mmmmm100111nnnnnddddd") +INST(UMAXP, "UMAXP", "0Q101110zz1mmmmm101001nnnnnddddd") +INST(UMINP, "UMINP", "0Q101110zz1mmmmm101011nnnnnddddd") +INST(SQRDMULH_vec_2, "SQRDMULH (vector)", "0Q101110zz1mmmmm101101nnnnnddddd") +INST(SSHR_2, "SSHR", "0Q0011110IIIIiii000001nnnnnddddd") +INST(SSRA_2, "SSRA", "0Q0011110IIIIiii000101nnnnnddddd") +INST(SRSHR_2, "SRSHR", "0Q0011110IIIIiii001001nnnnnddddd") +INST(SRSRA_2, "SRSRA", "0Q0011110IIIIiii001101nnnnnddddd") +INST(SHL_2, "SHL", "0Q0011110IIIIiii010101nnnnnddddd") +INST(SQSHL_imm_2, "SQSHL (immediate)", "0Q0011110IIIIiii011101nnnnnddddd") +INST(SHRN, "SHRN, SHRN2", "0Q0011110IIIIiii100001nnnnnddddd") +INST(RSHRN, "RSHRN, RSHRN2", "0Q0011110IIIIiii100011nnnnnddddd") +INST(SQSHRN_2, "SQSHRN, SQSHRN2", "0Q0011110IIIIiii100101nnnnnddddd") +INST(SQRSHRN_2, "SQRSHRN, SQRSHRN2", "0Q0011110IIIIiii100111nnnnnddddd") +INST(SSHLL, "SSHLL, SSHLL2", "0Q0011110IIIIiii101001nnnnnddddd") +INST(SCVTF_fix_2, "SCVTF (vector, fixed-point)", "0Q0011110IIIIiii111001nnnnnddddd") +INST(FCVTZS_fix_2, "FCVTZS (vector, fixed-point)", "0Q0011110IIIIiii111111nnnnnddddd") +INST(USHR_2, "USHR", "0Q1011110IIIIiii000001nnnnnddddd") +INST(USRA_2, "USRA", "0Q1011110IIIIiii000101nnnnnddddd") +INST(URSHR_2, "URSHR", "0Q1011110IIIIiii001001nnnnnddddd") +INST(URSRA_2, "URSRA", "0Q1011110IIIIiii001101nnnnnddddd") +INST(SRI_2, "SRI", "0Q1011110IIIIiii010001nnnnnddddd") +INST(SLI_2, "SLI", "0Q1011110IIIIiii010101nnnnnddddd") +INST(SQSHLU_2, "SQSHLU", "0Q1011110IIIIiii011001nnnnnddddd") +INST(UQSHL_imm_2, "UQSHL (immediate)", "0Q1011110IIIIiii011101nnnnnddddd") +INST(SQSHRUN_2, "SQSHRUN, SQSHRUN2", "0Q1011110IIIIiii100001nnnnnddddd") +INST(SQRSHRUN_2, "SQRSHRUN, SQRSHRUN2", "0Q1011110IIIIiii100011nnnnnddddd") +INST(UQSHRN_2, "UQSHRN, UQSHRN2", "0Q1011110IIIIiii100101nnnnnddddd") +INST(UQRSHRN_2, "UQRSHRN, UQRSHRN2", "0Q1011110IIIIiii100111nnnnnddddd") +INST(USHLL, "USHLL, USHLL2", "0Q1011110IIIIiii101001nnnnnddddd") +INST(UCVTF_fix_2, "UCVTF (vector, fixed-point)", "0Q1011110IIIIiii111001nnnnnddddd") +INST(FCVTZU_fix_2, "FCVTZU (vector, fixed-point)", "0Q1011110IIIIiii111111nnnnnddddd") +INST(FMLA_elt_3, "FMLA (by element)", "0Q00111100LMmmmm0001H0nnnnnddddd") +INST(FMLS_elt_3, "FMLS (by element)", "0Q00111100LMmmmm0101H0nnnnnddddd") +INST(UnallocatedEncoding, "", "10111000110---------00----------") +INST(PRFM_unscaled_imm, "PRFM (unscaled offset)", "11111000100iiiiiiiii00nnnnnttttt") +INST(STTRB, "STTRB", "00111000000iiiiiiiii10nnnnnttttt") +INST(LDTRB, "LDTRB", "00111000010iiiiiiiii10nnnnnttttt") +INST(STTRH, "STTRH", "01111000000iiiiiiiii10nnnnnttttt") +INST(LDTRH, "LDTRH", "01111000010iiiiiiiii10nnnnnttttt") +INST(LDTRSW, "LDTRSW", "10111000100iiiiiiiii10nnnnnttttt") +INST(CCMN_reg, "CCMN (register)", "z0111010010mmmmmcccc00nnnnn0ffff") +INST(CCMP_reg, "CCMP (register)", "z1111010010mmmmmcccc00nnnnn0ffff") +INST(CCMN_imm, "CCMN (immediate)", "z0111010010iiiiicccc10nnnnn0ffff") +INST(CCMP_imm, "CCMP (immediate)", "z1111010010iiiiicccc10nnnnn0ffff") +INST(SQDMULL_elt_1, "SQDMULL, SQDMULL2 (by element)", "01011111zzLMmmmm1011H0nnnnnddddd") +INST(SQDMULH_elt_1, "SQDMULH (by element)", "01011111zzLMmmmm1100H0nnnnnddddd") +INST(SQRDMULH_elt_1, "SQRDMULH (by element)", "01011111zzLMmmmm1101H0nnnnnddddd") +INST(INS_elt, "INS (element)", "01101110000iiiii0iiii1nnnnnddddd") +INST(FCADD_vec, "FCADD", "0Q101110zz0mmmmm111r01nnnnnddddd") +INST(FMLA_elt_4, "FMLA (by element)", "0Q0011111zLMmmmm0001H0nnnnnddddd") +INST(FMLS_elt_4, "FMLS (by element)", "0Q0011111zLMmmmm0101H0nnnnnddddd") +INST(FMUL_elt_4, "FMUL (by element)", "0Q0011111zLMmmmm1001H0nnnnnddddd") +INST(FMULX_elt_4, "FMULX (by element)", "0Q1011111zLMmmmm1001H0nnnnnddddd") +INST(SCVTF_float_fix, "SCVTF (scalar, fixed-point)", "z0011110yy000010ppppppnnnnnddddd") +INST(UCVTF_float_fix, "UCVTF (scalar, fixed-point)", "z0011110yy000011ppppppnnnnnddddd") +INST(FCVTZS_float_fix, "FCVTZS (scalar, fixed-point)", "z0011110yy011000ppppppnnnnnddddd") +INST(FCVTZU_float_fix, "FCVTZU (scalar, fixed-point)", "z0011110yy011001ppppppnnnnnddddd") +INST(MSR_reg, "MSR (register)", "110101010001poooNNNNMMMMooottttt") +INST(MRS, "MRS", "110101010011poooNNNNMMMMooottttt") +INST(UnallocatedEncoding, "", "111110001-0---------00----------") +INST(UnallocatedEncoding, "", "10111000110----------1----------") +INST(CSEL, "CSEL", "z0011010100mmmmmcccc00nnnnnddddd") +INST(CSINC, "CSINC", "z0011010100mmmmmcccc01nnnnnddddd") +INST(CSINV, "CSINV", "z1011010100mmmmmcccc00nnnnnddddd") +INST(CSNEG, "CSNEG", "z1011010100mmmmmcccc01nnnnnddddd") +INST(SMADDL, "SMADDL", "10011011001mmmmm0aaaaannnnnddddd") +INST(SMSUBL, "SMSUBL", "10011011001mmmmm1aaaaannnnnddddd") +INST(UMADDL, "UMADDL", "10011011101mmmmm0aaaaannnnnddddd") +INST(UMSUBL, "UMSUBL", "10011011101mmmmm1aaaaannnnnddddd") +INST(EXT, "EXT", "0Q101110000mmmmm0iiii0nnnnnddddd") +INST(FCMLA_vec, "FCMLA", "0Q101110zz0mmmmm110rr1nnnnnddddd") +INST(SMLAL_elt, "SMLAL, SMLAL2 (by element)", "0Q001111zzLMmmmm0010H0nnnnnddddd") +INST(SMLSL_elt, "SMLSL, SMLSL2 (by element)", "0Q001111zzLMmmmm0110H0nnnnnddddd") +INST(MUL_elt, "MUL (by element)", "0Q001111zzLMmmmm1000H0nnnnnddddd") +INST(SMULL_elt, "SMULL, SMULL2 (by element)", "0Q001111zzLMmmmm1010H0nnnnnddddd") +INST(SQDMULL_elt_2, "SQDMULL, SQDMULL2 (by element)", "0Q001111zzLMmmmm1011H0nnnnnddddd") +INST(SQDMULH_elt_2, "SQDMULH (by element)", "0Q001111zzLMmmmm1100H0nnnnnddddd") +INST(SQRDMULH_elt_2, "SQRDMULH (by element)", "0Q001111zzLMmmmm1101H0nnnnnddddd") +INST(SDOT_elt, "SDOT (by element)", "0Q001111zzLMmmmm1110H0nnnnnddddd") +INST(MLA_elt, "MLA (by element)", "0Q101111zzLMmmmm0000H0nnnnnddddd") +INST(UMLAL_elt, "UMLAL, UMLAL2 (by element)", "0Q101111zzLMmmmm0010H0nnnnnddddd") +INST(MLS_elt, "MLS (by element)", "0Q101111zzLMmmmm0100H0nnnnnddddd") +INST(UMLSL_elt, "UMLSL, UMLSL2 (by element)", "0Q101111zzLMmmmm0110H0nnnnnddddd") +INST(UMULL_elt, "UMULL, UMULL2 (by element)", "0Q101111zzLMmmmm1010H0nnnnnddddd") +INST(UDOT_elt, "UDOT (by element)", "0Q101111zzLMmmmm1110H0nnnnnddddd") +INST(EOR3, "EOR3", "11001110000mmmmm0aaaaannnnnddddd") +INST(BCAX, "BCAX", "11001110001mmmmm0aaaaannnnnddddd") +INST(SM3SS1, "SM3SS1", "11001110010mmmmm0aaaaannnnnddddd") +INST(FCCMP_float, "FCCMP", "00011110yy1mmmmmcccc01nnnnn0ffff") +INST(FCCMPE_float, "FCCMPE", "00011110yy1mmmmmcccc01nnnnn1ffff") +INST(ST1_sngl_2, "ST1 (single structure)", "0Q001101100mmmmmoo0Szznnnnnttttt") +INST(ST3_sngl_2, "ST3 (single structure)", "0Q001101100mmmmmoo1Szznnnnnttttt") +INST(ST2_sngl_2, "ST2 (single structure)", "0Q001101101mmmmmoo0Szznnnnnttttt") +INST(ST4_sngl_2, "ST4 (single structure)", "0Q001101101mmmmmoo1Szznnnnnttttt") +INST(LD1_sngl_2, "LD1 (single structure)", "0Q001101110mmmmmoo0Szznnnnnttttt") +INST(LD3_sngl_2, "LD3 (single structure)", "0Q001101110mmmmmoo1Szznnnnnttttt") +INST(LD2_sngl_2, "LD2 (single structure)", "0Q001101111mmmmmoo0Szznnnnnttttt") +INST(LD4_sngl_2, "LD4 (single structure)", "0Q001101111mmmmmoo1Szznnnnnttttt") +INST(STXP, "STXP", "1z001000001sssss0uuuuunnnnnttttt") +INST(STLXP, "STLXP", "1z001000001sssss1uuuuunnnnnttttt") +INST(UnallocatedEncoding, "", "111110001-0----------1----------") +INST(LDTRSB, "LDTRSB", "00111000oo0iiiiiiiii10nnnnnttttt") +INST(LDTRSH, "LDTRSH", "01111000oo0iiiiiiiii10nnnnnttttt") +INST(STTR, "STTR", "zz111000000iiiiiiiii10nnnnnttttt") +INST(LDTR, "LDTR", "zz111000010iiiiiiiii10nnnnnttttt") +INST(MADD, "MADD", "z0011011000mmmmm0aaaaannnnnddddd") +INST(MSUB, "MSUB", "z0011011000mmmmm1aaaaannnnnddddd") +INST(XAR, "XAR", "11001110100mmmmmiiiiiinnnnnddddd") +INST(FCSEL_float, "FCSEL", "00011110yy1mmmmmcccc11nnnnnddddd") +INST(STx_mult_2, "STx (multiple structures)", "0Q001100100mmmmmoooozznnnnnttttt") +INST(LDx_mult_2, "LDx (multiple structures)", "0Q001100110mmmmmoooozznnnnnttttt") +INST(PRFM_imm, "PRFM (immediate)", "1111100110iiiiiiiiiiiinnnnnttttt") +INST(STUR_fpsimd, "STUR (SIMD&FP)", "zz111100o00iiiiiiiii00nnnnnttttt") +INST(LDUR_fpsimd, "LDUR (SIMD&FP)", "zz111100o10iiiiiiiii00nnnnnttttt") +INST(UnallocatedEncoding, "", "1111100111----------------------") +INST(UnallocatedEncoding, "", "1011100111----------------------") +INST(STRx_reg, "STRx (register)", "zz111000o01mmmmmxxxS10nnnnnttttt") +INST(LDRx_reg, "LDRx (register)", "zz111000o11mmmmmxxxS10nnnnnttttt") +INST(STR_reg_fpsimd, "STR (register, SIMD&FP)", "zz111100o01mmmmmxxxS10nnnnnttttt") +INST(LDR_reg_fpsimd, "LDR (register, SIMD&FP)", "zz111100o11mmmmmxxxS10nnnnnttttt") +INST(ADD_ext, "ADD (extended register)", "z0001011001mmmmmxxxiiinnnnnddddd") +INST(ADDS_ext, "ADDS (extended register)", "z0101011001mmmmmxxxiiinnnnnddddd") +INST(SUB_ext, "SUB (extended register)", "z1001011001mmmmmxxxiiinnnnnddddd") +INST(SUBS_ext, "SUBS (extended register)", "z1101011001mmmmmxxxiiinnnnnddddd") +INST(FCMLA_elt, "FCMLA (by element)", "0Q101111zzLMmmmm0rr1H0nnnnnddddd") +INST(FMADD_float, "FMADD", "00011111yy0mmmmm0aaaaannnnnddddd") +INST(FMSUB_float, "FMSUB", "00011111yy0mmmmm1aaaaannnnnddddd") +INST(FNMADD_float, "FNMADD", "00011111yy1mmmmm0aaaaannnnnddddd") +INST(FNMSUB_float, "FNMSUB", "00011111yy1mmmmm1aaaaannnnnddddd") +INST(EXTR, "EXTR", "z00100111N0mmmmmssssssnnnnnddddd") +INST(B_cond, "B.cond", "01010100iiiiiiiiiiiiiiiiiii0cccc") +INST(STURx_LDURx, "STURx/LDURx", "zz111000oo0iiiiiiiii00nnnnnttttt") +INST(STR_imm_fpsimd_1, "STR (immediate, SIMD&FP)", "zz111100o00iiiiiiiiip1nnnnnttttt") +INST(LDR_imm_fpsimd_1, "LDR (immediate, SIMD&FP)", "zz111100o10iiiiiiiiip1nnnnnttttt") +INST(AND_imm, "AND (immediate)", "z00100100Nrrrrrrssssssnnnnnddddd") +INST(ORR_imm, "ORR (immediate)", "z01100100Nrrrrrrssssssnnnnnddddd") +INST(EOR_imm, "EOR (immediate)", "z10100100Nrrrrrrssssssnnnnnddddd") +INST(ANDS_imm, "ANDS (immediate)", "z11100100Nrrrrrrssssssnnnnnddddd") +INST(MOVN, "MOVN", "z00100101ssiiiiiiiiiiiiiiiiddddd") +INST(MOVZ, "MOVZ", "z10100101ssiiiiiiiiiiiiiiiiddddd") +INST(MOVK, "MOVK", "z11100101ssiiiiiiiiiiiiiiiiddddd") +INST(SBFM, "SBFM", "z00100110Nrrrrrrssssssnnnnnddddd") +INST(BFM, "BFM", "z01100110Nrrrrrrssssssnnnnnddddd") +INST(UBFM, "UBFM", "z10100110Nrrrrrrssssssnnnnnddddd") +INST(LDRSW_lit, "LDRSW (literal)", "10011000iiiiiiiiiiiiiiiiiiittttt") +INST(PRFM_lit, "PRFM (literal)", "11011000iiiiiiiiiiiiiiiiiiittttt") +INST(STNP_LDNP_gen, "STNP/LDNP", "o01010000Liiiiiiiuuuuunnnnnttttt") +INST(STRx_LDRx_imm_1, "STRx/LDRx (immediate)", "zz111000oo0iiiiiiiiip1nnnnnttttt") +INST(AND_shift, "AND (shifted register)", "z0001010ss0mmmmmiiiiiinnnnnddddd") +INST(BIC_shift, "BIC (shifted register)", "z0001010ss1mmmmmiiiiiinnnnnddddd") +INST(ORR_shift, "ORR (shifted register)", "z0101010ss0mmmmmiiiiiinnnnnddddd") +INST(ORN_shift, "ORN (shifted register)", "z0101010ss1mmmmmiiiiiinnnnnddddd") +INST(EOR_shift, "EOR (shifted register)", "z1001010ss0mmmmmiiiiiinnnnnddddd") +INST(EON, "EON (shifted register)", "z1001010ss1mmmmmiiiiiinnnnnddddd") +INST(ANDS_shift, "ANDS (shifted register)", "z1101010ss0mmmmmiiiiiinnnnnddddd") +INST(BICS, "BICS (shifted register)", "z1101010ss1mmmmmiiiiiinnnnnddddd") +INST(ADD_shift, "ADD (shifted register)", "z0001011ss0mmmmmiiiiiinnnnnddddd") +INST(ADDS_shift, "ADDS (shifted register)", "z0101011ss0mmmmmiiiiiinnnnnddddd") +INST(SUB_shift, "SUB (shifted register)", "z1001011ss0mmmmmiiiiiinnnnnddddd") +INST(SUBS_shift, "SUBS (shifted register)", "z1101011ss0mmmmmiiiiiinnnnnddddd") +INST(ADD_imm, "ADD (immediate)", "z0010001ssiiiiiiiiiiiinnnnnddddd") +INST(ADDS_imm, "ADDS (immediate)", "z0110001ssiiiiiiiiiiiinnnnnddddd") +INST(SUB_imm, "SUB (immediate)", "z1010001ssiiiiiiiiiiiinnnnnddddd") +INST(SUBS_imm, "SUBS (immediate)", "z1110001ssiiiiiiiiiiiinnnnnddddd") +INST(CBZ, "CBZ", "z0110100iiiiiiiiiiiiiiiiiiittttt") +INST(CBNZ, "CBNZ", "z0110101iiiiiiiiiiiiiiiiiiittttt") +INST(TBZ, "TBZ", "b0110110bbbbbiiiiiiiiiiiiiittttt") +INST(TBNZ, "TBNZ", "b0110111bbbbbiiiiiiiiiiiiiittttt") +INST(LDR_lit_gen, "LDR (literal)", "0z011000iiiiiiiiiiiiiiiiiiittttt") +INST(STNP_LDNP_fpsimd, "STNP/LDNP (SIMD&FP)", "oo1011000Liiiiiiiuuuuunnnnnttttt") +INST(UnallocatedEncoding, "", "--1010000-----------------------") +INST(UnallocatedEncoding, "", "--1011000-----------------------") +INST(STR_imm_fpsimd_2, "STR (immediate, SIMD&FP)", "zz111101o0iiiiiiiiiiiinnnnnttttt") +INST(LDR_imm_fpsimd_2, "LDR (immediate, SIMD&FP)", "zz111101o1iiiiiiiiiiiinnnnnttttt") +INST(ADR, "ADR", "0ii10000iiiiiiiiiiiiiiiiiiiddddd") +INST(ADRP, "ADRP", "1ii10000iiiiiiiiiiiiiiiiiiiddddd") +INST(B_uncond, "B", "000101iiiiiiiiiiiiiiiiiiiiiiiiii") +INST(BL, "BL", "100101iiiiiiiiiiiiiiiiiiiiiiiiii") +INST(LDR_lit_fpsimd, "LDR (literal, SIMD&FP)", "oo011100iiiiiiiiiiiiiiiiiiittttt") +INST(STRx_LDRx_imm_2, "STRx/LDRx (immediate)", "zz111001ooiiiiiiiiiiiinnnnnttttt") +INST(STP_LDP_gen, "STP/LDP", "oo10100pwLiiiiiiiuuuuunnnnnttttt") +INST(STP_LDP_fpsimd, "STP/LDP (SIMD&FP)", "oo10110pwLiiiiiiiuuuuunnnnnttttt") diff --git a/src/dynarmic/src/dynarmic/frontend/A64/translate/a64_translate.cpp b/src/dynarmic/src/dynarmic/frontend/A64/translate/a64_translate.cpp index 6778d13890..4afce6bd29 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/translate/a64_translate.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A64/translate/a64_translate.cpp @@ -25,7 +25,11 @@ void Translate(IR::Block& block, LocationDescriptor descriptor, MemoryReadCodeFu const u64 pc = visitor.ir.current_location->PC(); if (const auto instruction = memory_read_code(pc)) { auto decoder = Decode(*instruction); - should_continue = decoder.get().call(visitor, *instruction); + if (decoder) { + should_continue = decoder->get().call(visitor, *instruction); + } else { + should_continue = visitor.RaiseException(Exception::UnallocatedEncoding); + } } else { should_continue = visitor.RaiseException(Exception::NoExecuteFault); } @@ -45,13 +49,15 @@ bool TranslateSingleInstruction(IR::Block& block, LocationDescriptor descriptor, bool should_continue = true; auto const decoder = Decode(instruction); - should_continue = decoder.get().call(visitor, instruction); + if (decoder) { + should_continue = decoder->get().call(visitor, instruction); + } else { + should_continue = false; + } visitor.ir.current_location = visitor.ir.current_location->AdvancePC(4); block.CycleCount()++; - block.SetEndLocation(*visitor.ir.current_location); - return should_continue; } diff --git a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/impl.cpp b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/impl.cpp index de63f560bc..f3ecd7c604 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/impl.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/impl.cpp @@ -8,9 +8,7 @@ #include "dynarmic/frontend/A64/translate/impl/impl.h" -#include -#include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/ir/terminal.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_copy.cpp b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_copy.cpp index b33bc8f5ad..9354b54fa5 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_copy.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_copy.cpp @@ -1,9 +1,12 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2018 MerryMage * SPDX-License-Identifier: 0BSD */ -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A64/translate/impl/impl.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_modified_immediate.cpp b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_modified_immediate.cpp index db8e83631a..2afece1c46 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_modified_immediate.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_modified_immediate.cpp @@ -1,9 +1,12 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2018 MerryMage * SPDX-License-Identifier: 0BSD */ -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A64/translate/impl/impl.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp index 5d60cb31c3..332eb35ebe 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp @@ -1,9 +1,12 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2018 MerryMage * SPDX-License-Identifier: 0BSD */ -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/fp/rounding_mode.h" #include "dynarmic/frontend/A64/translate/impl/impl.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_scalar_three_same.cpp b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_scalar_three_same.cpp index d551605bda..ad5b89df3a 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_scalar_three_same.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_scalar_three_same.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2018 MerryMage * SPDX-License-Identifier: 0BSD @@ -5,7 +8,7 @@ #include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/frontend/A64/translate/impl/impl.h" diff --git a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_shift_by_immediate.cpp b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_shift_by_immediate.cpp index 559721a22a..33debd1062 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_shift_by_immediate.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/simd_shift_by_immediate.cpp @@ -1,9 +1,12 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2018 MerryMage * SPDX-License-Identifier: 0BSD */ -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/fp/rounding_mode.h" #include "dynarmic/frontend/A64/translate/impl/impl.h" diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index 0d9da6169c..3ab360c287 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -13,8 +13,8 @@ #include #include "dynarmic/common/assert.h" -#include -#include +#include "dynarmic/mcl/bit.hpp" +#include "dynarmic/mcl/function_info.hpp" namespace Dynarmic::Decoder { namespace detail { @@ -75,11 +75,11 @@ struct detail { /// An argument is specified by a continuous string of the same character. template static consteval auto GetArgInfo(std::array bitstring) { + //static_assert(N > 0, "unexpected field"); std::array masks = {}; std::array shifts = {}; size_t arg_index = 0; char ch = 0; - for (size_t i = 0; i < opcode_bitsize; i++) { if (bitstring[i] == '0' || bitstring[i] == '1' || bitstring[i] == '-') { if (ch != 0) { @@ -93,17 +93,10 @@ struct detail { ch = bitstring[i]; arg_index++; } - - if constexpr (N > 0) { - const size_t bit_position = opcode_bitsize - i - 1; - if (arg_index >= N) - throw std::out_of_range("Unexpected field"); - - masks[arg_index] |= static_cast(1) << bit_position; - shifts[arg_index] = bit_position; - } else { - throw std::out_of_range("Unexpected field"); - } + const size_t bit_position = opcode_bitsize - i - 1; + //static_assert(arg_index >= N, "unexpected field"); + masks[arg_index] |= opcode_type(1) << bit_position; + shifts[arg_index] = bit_position; } } #if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__) diff --git a/src/dynarmic/src/dynarmic/frontend/imm.cpp b/src/dynarmic/src/dynarmic/frontend/imm.cpp index 95e24206d7..aeb7b5d3f6 100644 --- a/src/dynarmic/src/dynarmic/frontend/imm.cpp +++ b/src/dynarmic/src/dynarmic/frontend/imm.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -9,7 +9,7 @@ #include "dynarmic/frontend/imm.h" #include "dynarmic/common/assert.h" -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" namespace Dynarmic { diff --git a/src/dynarmic/src/dynarmic/frontend/imm.h b/src/dynarmic/src/dynarmic/frontend/imm.h index 4e277ede3b..3a6c10316a 100644 --- a/src/dynarmic/src/dynarmic/frontend/imm.h +++ b/src/dynarmic/src/dynarmic/frontend/imm.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -11,8 +11,7 @@ #include #include "dynarmic/common/assert.h" -#include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/common/math_util.h" diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.h b/src/dynarmic/src/dynarmic/ir/basic_block.h index 4044005bd0..bbf1319957 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.h +++ b/src/dynarmic/src/dynarmic/ir/basic_block.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include "dynarmic/mcl/intrusive_list.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/ir/location_descriptor.h" diff --git a/src/dynarmic/src/dynarmic/ir/ir_emitter.h b/src/dynarmic/src/dynarmic/ir/ir_emitter.h index d146253e34..ed95c8b5be 100644 --- a/src/dynarmic/src/dynarmic/ir/ir_emitter.h +++ b/src/dynarmic/src/dynarmic/ir/ir_emitter.h @@ -12,7 +12,7 @@ #include "dynarmic/common/common_types.h" #include "dynarmic/common/assert.h" -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/ir/opcodes.h" #include "dynarmic/ir/acc_type.h" diff --git a/src/dynarmic/src/dynarmic/ir/microinstruction.h b/src/dynarmic/src/dynarmic/ir/microinstruction.h index 1700eb110b..fb3ac1f49a 100644 --- a/src/dynarmic/src/dynarmic/ir/microinstruction.h +++ b/src/dynarmic/src/dynarmic/ir/microinstruction.h @@ -10,7 +10,7 @@ #include -#include +#include "dynarmic/mcl/intrusive_list.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/ir/value.h" diff --git a/src/dynarmic/src/dynarmic/ir/opcodes.inc b/src/dynarmic/src/dynarmic/ir/opcodes.inc index cc0b87627f..b1ba5b2993 100644 --- a/src/dynarmic/src/dynarmic/ir/opcodes.inc +++ b/src/dynarmic/src/dynarmic/ir/opcodes.inc @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // First we list common shared opcodes // Since we give priority to A64 performance, we include them first, this is so we // can discard all A32 opcodes instead of having a "hole" in our checks @@ -47,8 +50,8 @@ OPCODE(LogicalShiftRight64, U64, U64, OPCODE(ArithmeticShiftRight32, U32, U32, U8, U1 ) OPCODE(ArithmeticShiftRight64, U64, U64, U8 ) // windows.h defines RotateRight64 and RotateRight32 -OPCODE(BitRotateRight32, U32, U32, U8, U1 ) -OPCODE(BitRotateRight64, U64, U64, U8 ) +OPCODE(BitRotateRight32, U32, U32, U8, U1 ) +OPCODE(BitRotateRight64, U64, U64, U8 ) OPCODE(RotateRightExtended, U32, U32, U1 ) OPCODE(LogicalShiftLeftMasked32, U32, U32, U32 ) OPCODE(LogicalShiftLeftMasked64, U64, U64, U64 ) @@ -710,6 +713,8 @@ A64OPC(ExclusiveWriteMemory32, U32, U64, A64OPC(ExclusiveWriteMemory64, U32, U64, U64, U64, AccType ) A64OPC(ExclusiveWriteMemory128, U32, U64, U64, U128, AccType ) +// Remember to update: +// - a32_emit_x64.cpp // A32 Context getters/setters A32OPC(SetCheckBit, Void, U1 ) diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp index 87f0d908e1..999d4c49bc 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -27,8 +27,8 @@ #include "dynarmic/ir/opcodes.h" #include "dynarmic/ir/opt_passes.h" #include "dynarmic/ir/type.h" -#include "mcl/bit/swap.hpp" -#include "mcl/bit/rotate.hpp" +#include "dynarmic/mcl/bit.hpp" +#include "dynarmic/mcl/bit.hpp" namespace Dynarmic::Optimization { diff --git a/src/dynarmic/src/dynarmic/ir/value.cpp b/src/dynarmic/src/dynarmic/ir/value.cpp index 59b17b5a7b..451036b1fd 100644 --- a/src/dynarmic/src/dynarmic/ir/value.cpp +++ b/src/dynarmic/src/dynarmic/ir/value.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -9,7 +9,7 @@ #include "dynarmic/ir/value.h" #include "dynarmic/common/assert.h" -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" diff --git a/src/dynarmic/src/dynarmic/mcl/bit.hpp b/src/dynarmic/src/dynarmic/mcl/bit.hpp new file mode 100644 index 0000000000..1ef9880a5f --- /dev/null +++ b/src/dynarmic/src/dynarmic/mcl/bit.hpp @@ -0,0 +1,311 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2022 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include + +#include "dynarmic/common/common_types.h" +#include "dynarmic/common/assert.h" + +namespace mcl { +namespace detail { +template +concept SameHelper = std::is_same_v; +} // namespace detail +template +concept SameAs = detail::SameHelper && detail::SameHelper; +template +concept IsAnyOf = (SameAs || ...); +/// Integral upon which bit operations can be safely performed. +template +concept BitIntegral = IsAnyOf; + +template +constexpr std::size_t bitsizeof = CHAR_BIT * sizeof(T); + +} // namespace mcl + +namespace mcl::bit { + +constexpr u16 swap_bytes_16(u16 value) { + return static_cast(u32{value} >> 8 | u32{value} << 8); +} + +constexpr u32 swap_bytes_32(u32 value) { + return ((value & 0xff000000u) >> 24) + | ((value & 0x00ff0000u) >> 8) + | ((value & 0x0000ff00u) << 8) + | ((value & 0x000000ffu) << 24); +} + +constexpr u64 swap_bytes_64(u64 value) { + return ((value & 0xff00000000000000ull) >> 56) + | ((value & 0x00ff000000000000ull) >> 40) + | ((value & 0x0000ff0000000000ull) >> 24) + | ((value & 0x000000ff00000000ull) >> 8) + | ((value & 0x00000000ff000000ull) << 8) + | ((value & 0x0000000000ff0000ull) << 24) + | ((value & 0x000000000000ff00ull) << 40) + | ((value & 0x00000000000000ffull) << 56); +} + +constexpr u32 swap_halves_32(u32 value) { + return ((value & 0xffff0000u) >> 16) + | ((value & 0x0000ffffu) << 16); +} + +constexpr u64 swap_halves_64(u64 value) { + return ((value & 0xffff000000000000ull) >> 48) + | ((value & 0x0000ffff00000000ull) >> 16) + | ((value & 0x00000000ffff0000ull) << 16) + | ((value & 0x000000000000ffffull) << 48); +} + +constexpr u64 swap_words_64(u64 value) { + return ((value & 0xffffffff00000000ull) >> 32) + | ((value & 0x00000000ffffffffull) << 32); +} + +template +constexpr T rotate_right(T x, size_t amount) { + amount %= bitsizeof; + if (amount == 0) { + return x; + } + return static_cast((x >> amount) | (x << (bitsizeof - amount))); +} + +template +constexpr T rotate_left(T x, size_t amount) { + amount %= bitsizeof; + if (amount == 0) { + return x; + } + return static_cast((x << amount) | (x >> (bitsizeof - amount))); +} + +/// Create a mask with `count` number of one bits. +template +constexpr T ones() { + static_assert(count <= bitsizeof, "count larger than bitsize of T"); + + if constexpr (count == 0) { + return 0; + } else { + return static_cast(~static_cast(0)) >> (bitsizeof - count); + } +} + +/// Create a mask with `count` number of one bits. +template +constexpr T ones(size_t count) { + ASSERT(count <= bitsizeof && "count larger than bitsize of T"); + if (count == 0) { + return 0; + } + return static_cast(~static_cast(0)) >> (bitsizeof - count); +} + +/// Create a mask of type T for bits [begin_bit, end_bit] inclusive. +template +constexpr T mask() { + static_assert(begin_bit <= end_bit, "invalid bit range (position of beginning bit cannot be greater than that of end bit)"); + static_assert(begin_bit < bitsizeof, "begin_bit must be smaller than size of T"); + static_assert(end_bit < bitsizeof, "end_bit must be smaller than size of T"); + + return ones() << begin_bit; +} + +/// Create a mask of type T for bits [begin_bit, end_bit] inclusive. +template +constexpr T mask(size_t begin_bit, size_t end_bit) { + ASSERT(begin_bit <= end_bit && "invalid bit range (position of beginning bit cannot be greater than that of end bit)"); + ASSERT(begin_bit < bitsizeof && "begin_bit must be smaller than size of T"); + ASSERT(end_bit < bitsizeof && "end_bit must be smaller than size of T"); + return ones(end_bit - begin_bit + 1) << begin_bit; +} + +/// Extract bits [begin_bit, end_bit] inclusive from value of type T. +template +constexpr T get_bits(T value) { + constexpr T m = mask(); + return (value & m) >> begin_bit; +} + +/// Extract bits [begin_bit, end_bit] inclusive from value of type T. +template +constexpr T get_bits(size_t begin_bit, size_t end_bit, T value) { + const T m = mask(begin_bit, end_bit); + return (value & m) >> begin_bit; +} + +/// Clears bits [begin_bit, end_bit] inclusive of value of type T. +template +constexpr T clear_bits(T value) { + constexpr T m = mask(); + return value & ~m; +} + +/// Clears bits [begin_bit, end_bit] inclusive of value of type T. +template +constexpr T clear_bits(size_t begin_bit, size_t end_bit, T value) { + const T m = mask(begin_bit, end_bit); + return value & ~m; +} + +/// Modifies bits [begin_bit, end_bit] inclusive of value of type T. +template +constexpr T set_bits(T value, T new_bits) { + constexpr T m = mask(); + return (value & ~m) | ((new_bits << begin_bit) & m); +} + +/// Modifies bits [begin_bit, end_bit] inclusive of value of type T. +template +constexpr T set_bits(size_t begin_bit, size_t end_bit, T value, T new_bits) { + const T m = mask(begin_bit, end_bit); + return (value & ~m) | ((new_bits << begin_bit) & m); +} + +/// Extract bit at bit_position from value of type T. +template +constexpr bool get_bit(T value) { + constexpr T m = mask(); + return (value & m) != 0; +} + +/// Extract bit at bit_position from value of type T. +template +constexpr bool get_bit(size_t bit_position, T value) { + const T m = mask(bit_position, bit_position); + return (value & m) != 0; +} + +/// Clears bit at bit_position of value of type T. +template +constexpr T clear_bit(T value) { + constexpr T m = mask(); + return value & ~m; +} + +/// Clears bit at bit_position of value of type T. +template +constexpr T clear_bit(size_t bit_position, T value) { + const T m = mask(bit_position, bit_position); + return value & ~m; +} + +/// Modifies bit at bit_position of value of type T. +template +constexpr T set_bit(T value, bool new_bit) { + constexpr T m = mask(); + return (value & ~m) | (new_bit ? m : static_cast(0)); +} + +/// Modifies bit at bit_position of value of type T. +template +constexpr T set_bit(size_t bit_position, T value, bool new_bit) { + const T m = mask(bit_position, bit_position); + return (value & ~m) | (new_bit ? m : static_cast(0)); +} + +/// Sign-extends a value that has bit_count bits to the full bitwidth of type T. +template +constexpr T sign_extend(T value) { + static_assert(bit_count != 0, "cannot sign-extend zero-sized value"); + + using S = std::make_signed_t; + constexpr size_t shift_amount = bitsizeof - bit_count; + return static_cast(static_cast(value << shift_amount) >> shift_amount); +} + +/// Sign-extends a value that has bit_count bits to the full bitwidth of type T. +template +constexpr T sign_extend(size_t bit_count, T value) { + ASSERT(bit_count != 0 && "cannot sign-extend zero-sized value"); + using S = std::make_signed_t; + const size_t shift_amount = bitsizeof - bit_count; + return T(S(value << shift_amount) >> shift_amount); +} + +/// Replicate an element across a value of type T. +template +constexpr T replicate_element(T value) { + static_assert(element_size <= bitsizeof, "element_size is too large"); + static_assert(bitsizeof % element_size == 0, "bitsize of T not divisible by element_size"); + + if constexpr (element_size == bitsizeof) { + return value; + } else { + return replicate_element(static_cast(value | (value << element_size))); + } +} + +/// Replicate an element of type U across a value of type T. +template +constexpr T replicate_element(T value) { + static_assert(bitsizeof <= bitsizeof, "element_size is too large"); + + return replicate_element, T>(value); +} + +/// Replicate an element across a value of type T. +template +constexpr T replicate_element(size_t element_size, T value) { + ASSERT(element_size <= bitsizeof && "element_size is too large"); + ASSERT(bitsizeof % element_size == 0 && "bitsize of T not divisible by element_size"); + if (element_size == bitsizeof) + return value; + return replicate_element(element_size * 2, static_cast(value | (value << element_size))); +} + +template +constexpr bool most_significant_bit(T value) { + return get_bit - 1, T>(value); +} + +template +inline size_t count_ones(T x) { + return std::bitset>(x).count(); +} + +template +constexpr size_t count_leading_zeros(T x) { + size_t result = bitsizeof; + while (x != 0) { + x >>= 1; + result--; + } + return result; +} + +template +constexpr int highest_set_bit(T x) { + int result = -1; + while (x != 0) { + x >>= 1; + result++; + } + return result; +} + +template +constexpr size_t lowest_set_bit(T x) { + if (x == 0) { + return bitsizeof; + } + + size_t result = 0; + while ((x & 1) == 0) { + x >>= 1; + result++; + } + return result; +} + +} // namespace mcl::bit diff --git a/src/dynarmic/src/dynarmic/mcl/function_info.hpp b/src/dynarmic/src/dynarmic/mcl/function_info.hpp new file mode 100644 index 0000000000..452bd88bfc --- /dev/null +++ b/src/dynarmic/src/dynarmic/mcl/function_info.hpp @@ -0,0 +1,73 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2022 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include + +namespace mcl { + +/// Contains a list of types +template +struct type_list {}; + +template +struct function_info : function_info {}; + +template +struct function_info { + using return_type = R; + using parameter_list = type_list; + static constexpr std::size_t parameter_count = sizeof...(As); + + using equivalent_function_type = R(As...); + + template + struct parameter { + static_assert(I < parameter_count, "Non-existent parameter"); + using type = std::tuple_element_t>; + }; +}; + +template +struct function_info : function_info {}; + +template +struct function_info : function_info { + using class_type = C; + + using equivalent_function_type_with_class = R(C*, As...); +}; + +template +struct function_info : function_info { + using class_type = C; + + using equivalent_function_type_with_class = R(C*, As...); +}; + +template +constexpr size_t parameter_count_v = function_info::parameter_count; + +template +using parameter_list = typename function_info::parameter_list; + +template +using get_parameter = typename function_info::template parameter::type; + +template +using equivalent_function_type = typename function_info::equivalent_function_type; + +template +using equivalent_function_type_with_class = typename function_info::equivalent_function_type_with_class; + +template +using return_type = typename function_info::return_type; + +template +using class_type = typename function_info::class_type; + +} // namespace mcl diff --git a/src/dynarmic/src/dynarmic/mcl/integer_of_size.hpp b/src/dynarmic/src/dynarmic/mcl/integer_of_size.hpp new file mode 100644 index 0000000000..8bdecc955d --- /dev/null +++ b/src/dynarmic/src/dynarmic/mcl/integer_of_size.hpp @@ -0,0 +1,49 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2022 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include "dynarmic/common/common_types.h" + +namespace mcl { + +namespace detail { + +template +struct integer_of_size_impl {}; + +template<> +struct integer_of_size_impl<8> { + using unsigned_type = u8; + using signed_type = s8; +}; + +template<> +struct integer_of_size_impl<16> { + using unsigned_type = u16; + using signed_type = s16; +}; + +template<> +struct integer_of_size_impl<32> { + using unsigned_type = u32; + using signed_type = s32; +}; + +template<> +struct integer_of_size_impl<64> { + using unsigned_type = u64; + using signed_type = s64; +}; + +} // namespace detail + +template +using unsigned_integer_of_size = typename detail::integer_of_size_impl::unsigned_type; + +template +using signed_integer_of_size = typename detail::integer_of_size_impl::signed_type; + +} // namespace mcl diff --git a/src/dynarmic/src/dynarmic/mcl/intrusive_list.hpp b/src/dynarmic/src/dynarmic/mcl/intrusive_list.hpp new file mode 100644 index 0000000000..3b1c1d6699 --- /dev/null +++ b/src/dynarmic/src/dynarmic/mcl/intrusive_list.hpp @@ -0,0 +1,408 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2022 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include +#include +#include "dynarmic/common/assert.h" + +namespace mcl { + +template +class intrusive_list; +template +class intrusive_list_iterator; + +template +class intrusive_list_node { +public: + bool is_sentinel() const + { + return is_sentinel_; + } + +protected: + intrusive_list_node* next = nullptr; + intrusive_list_node* prev = nullptr; + bool is_sentinel_ = false; + + friend class intrusive_list; + friend class intrusive_list_iterator; + friend class intrusive_list_iterator; +}; + +template +class intrusive_list_sentinel final : public intrusive_list_node { + using intrusive_list_node::next; + using intrusive_list_node::prev; + using intrusive_list_node::is_sentinel_; + +public: + intrusive_list_sentinel() + { + next = this; + prev = this; + is_sentinel_ = true; + } +}; + +template +class intrusive_list_iterator { +public: + using iterator_category = std::bidirectional_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = T; + using pointer = value_type*; + using const_pointer = const value_type*; + using reference = value_type&; + using const_reference = const value_type&; + + // If value_type is const, we want "const intrusive_list_node", not "intrusive_list_node" + using node_type = std::conditional_t::value, + const intrusive_list_node>, + intrusive_list_node>; + using node_pointer = node_type*; + using node_reference = node_type&; + + intrusive_list_iterator() = default; + intrusive_list_iterator(const intrusive_list_iterator& other) = default; + intrusive_list_iterator& operator=(const intrusive_list_iterator& other) = default; + + explicit intrusive_list_iterator(node_pointer list_node) + : node(list_node) {} + explicit intrusive_list_iterator(pointer data) + : node(data) {} + explicit intrusive_list_iterator(reference data) + : node(&data) {} + + intrusive_list_iterator& operator++() + { + node = node->next; + return *this; + } + intrusive_list_iterator& operator--() + { + node = node->prev; + return *this; + } + intrusive_list_iterator operator++(int) + { + intrusive_list_iterator it(*this); + ++*this; + return it; + } + intrusive_list_iterator operator--(int) + { + intrusive_list_iterator it(*this); + --*this; + return it; + } + + bool operator==(const intrusive_list_iterator& other) const + { + return node == other.node; + } + bool operator!=(const intrusive_list_iterator& other) const + { + return !operator==(other); + } + + reference operator*() const + { + DEBUG_ASSERT(!node->is_sentinel()); + return static_cast(*node); + } + pointer operator->() const + { + return std::addressof(operator*()); + } + + node_pointer AsNodePointer() const + { + return node; + } + +private: + friend class intrusive_list; + node_pointer node = nullptr; +}; + +template +class intrusive_list { +public: + using difference_type = std::ptrdiff_t; + using size_type = std::size_t; + using value_type = T; + using pointer = value_type*; + using const_pointer = const value_type*; + using reference = value_type&; + using const_reference = const value_type&; + using iterator = intrusive_list_iterator; + using const_iterator = intrusive_list_iterator; + using reverse_iterator = std::reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; + + /** + * Inserts a node at the given location indicated by an iterator. + * + * @param location The location to insert the node. + * @param new_node The node to add. + */ + iterator insert(iterator location, pointer new_node) + { + return insert_before(location, new_node); + } + + /** + * Inserts a node at the given location, moving the previous + * node occupant ahead of the one inserted. + * + * @param location The location to insert the new node. + * @param new_node The node to insert into the list. + */ + iterator insert_before(iterator location, pointer new_node) + { + auto existing_node = location.AsNodePointer(); + + new_node->next = existing_node; + new_node->prev = existing_node->prev; + existing_node->prev->next = new_node; + existing_node->prev = new_node; + + return iterator(new_node); + } + + /** + * Inserts a new node into the list ahead of the position indicated. + * + * @param position Location to insert the node in front of. + * @param new_node The node to be inserted into the list. + */ + iterator insert_after(iterator position, pointer new_node) + { + if (empty()) + return insert(begin(), new_node); + + return insert(++position, new_node); + } + + /** + * Add an entry to the start of the list. + * @param node Node to add to the list. + */ + void push_front(pointer node) + { + insert(begin(), node); + } + + /** + * Add an entry to the end of the list + * @param node Node to add to the list. + */ + void push_back(pointer node) + { + insert(end(), node); + } + + /** + * Erases the node at the front of the list. + * @note Must not be called on an empty list. + */ + void pop_front() + { + DEBUG_ASSERT(!empty()); + erase(begin()); + } + + /** + * Erases the node at the back of the list. + * @note Must not be called on an empty list. + */ + void pop_back() + { + DEBUG_ASSERT(!empty()); + erase(--end()); + } + + /** + * Removes a node from this list + * @param it An iterator that points to the node to remove from list. + */ + pointer remove(iterator& it) + { + DEBUG_ASSERT(it != end()); + + pointer node = &*it++; + + node->prev->next = node->next; + node->next->prev = node->prev; +#if !defined(NDEBUG) + node->next = nullptr; + node->prev = nullptr; +#endif + + return node; + } + + /** + * Removes a node from this list + * @param it A constant iterator that points to the node to remove from list. + */ + pointer remove(const iterator& it) + { + iterator copy = it; + return remove(copy); + } + + /** + * Removes a node from this list. + * @param node A pointer to the node to remove. + */ + pointer remove(pointer node) + { + return remove(iterator(node)); + } + + /** + * Removes a node from this list. + * @param node A reference to the node to remove. + */ + pointer remove(reference node) + { + return remove(iterator(node)); + } + + /** + * Is this list empty? + * @returns true if there are no nodes in this list. + */ + bool empty() const + { + return root->next == root.get(); + } + + /** + * Gets the total number of elements within this list. + * @return the number of elements in this list. + */ + size_type size() const + { + return static_cast(std::distance(begin(), end())); + } + + /** + * Retrieves a reference to the node at the front of the list. + * @note Must not be called on an empty list. + */ + reference front() + { + DEBUG_ASSERT(!empty()); + return *begin(); + } + + /** + * Retrieves a constant reference to the node at the front of the list. + * @note Must not be called on an empty list. + */ + const_reference front() const + { + DEBUG_ASSERT(!empty()); + return *begin(); + } + + /** + * Retrieves a reference to the node at the back of the list. + * @note Must not be called on an empty list. + */ + reference back() + { + DEBUG_ASSERT(!empty()); + return *--end(); + } + + /** + * Retrieves a constant reference to the node at the back of the list. + * @note Must not be called on an empty list. + */ + const_reference back() const + { + DEBUG_ASSERT(!empty()); + return *--end(); + } + + // Iterator interface + iterator begin() { return iterator(root->next); } + const_iterator begin() const { return const_iterator(root->next); } + const_iterator cbegin() const { return begin(); } + + iterator end() { return iterator(root.get()); } + const_iterator end() const { return const_iterator(root.get()); } + const_iterator cend() const { return end(); } + + reverse_iterator rbegin() { return reverse_iterator(end()); } + const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } + const_reverse_iterator crbegin() const { return rbegin(); } + + reverse_iterator rend() { return reverse_iterator(begin()); } + const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } + const_reverse_iterator crend() const { return rend(); } + + /** + * Erases a node from the list, indicated by an iterator. + * @param it The iterator that points to the node to erase. + */ + iterator erase(iterator it) + { + remove(it); + return it; + } + + /** + * Erases a node from this list. + * @param node A pointer to the node to erase from this list. + */ + iterator erase(pointer node) + { + return erase(iterator(node)); + } + + /** + * Erases a node from this list. + * @param node A reference to the node to erase from this list. + */ + iterator erase(reference node) + { + return erase(iterator(node)); + } + + /** + * Exchanges contents of this list with another list instance. + * @param other The other list to swap with. + */ + void swap(intrusive_list& other) noexcept + { + root.swap(other.root); + } + +private: + std::shared_ptr> root = std::make_shared>(); +}; + +/** + * Exchanges contents of an intrusive list with another intrusive list. + * @tparam T The type of data being kept track of by the lists. + * @param lhs The first list. + * @param rhs The second list. + */ +template +void swap(intrusive_list& lhs, intrusive_list& rhs) noexcept +{ + lhs.swap(rhs); +} + +} // namespace mcl diff --git a/src/dynarmic/src/dynarmic/mcl/is_instance_of_template.hpp b/src/dynarmic/src/dynarmic/mcl/is_instance_of_template.hpp new file mode 100644 index 0000000000..0cb143430d --- /dev/null +++ b/src/dynarmic/src/dynarmic/mcl/is_instance_of_template.hpp @@ -0,0 +1,35 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2022 merryhime +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include + +namespace mcl { + +/// A metavalue (of type VT and value v). +template using value = std::integral_constant; +/// A metavalue of type size_t (and value v). +template using size_value = value; +/// A metavalue of type bool (and value v). (Aliases to std::bool_constant.) +template using bool_value = value; +/// true metavalue (Aliases to std::true_type). +using true_type = bool_value; +/// false metavalue (Aliases to std::false_type). +using false_type = bool_value; + +/// Is type T an instance of template class C? +template class, class> +struct is_instance_of_template : false_type {}; + +template class C, class... As> +struct is_instance_of_template> : true_type {}; + +/// Is type T an instance of template class C? +template class C, class T> +constexpr bool is_instance_of_template_v = is_instance_of_template::value; + +} // namespace mcl diff --git a/src/dynarmic/tests/A32/fuzz_arm.cpp b/src/dynarmic/tests/A32/fuzz_arm.cpp index fd17b3bd01..e9834a6663 100644 --- a/src/dynarmic/tests/A32/fuzz_arm.cpp +++ b/src/dynarmic/tests/A32/fuzz_arm.cpp @@ -15,20 +15,17 @@ #include #include -#include -#include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" -#include "../fuzz_util.h" -#include "../rand_int.h" -#include "../unicorn_emu/a32_unicorn.h" -#include "./testenv.h" -#include "../native/testenv.h" +#include "dynarmic/tests/fuzz_util.h" +#include "dynarmic/tests/rand_int.h" +#include "dynarmic/tests/unicorn_emu/a32_unicorn.h" +#include "dynarmic/tests/A32/testenv.h" +#include "dynarmic/tests/native/testenv.h" #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/llvm_disassemble.h" -#include "dynarmic/common/variant_util.h" #include "dynarmic/frontend/A32/ITState.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/a32_types.h" @@ -357,67 +354,67 @@ static void RunTestInstance(Dynarmic::A32::Jit& jit, uni_env.ticks_left = instructions.size(); // Unicorn counts thumb instructions weirdly. uni.Run(); - SCOPE_FAIL { - fmt::print("Instruction Listing:\n"); - fmt::print("{}\n", Common::DisassembleAArch32(std::is_same_v, initial_pc, (const u8*)instructions.data(), instructions.size() * sizeof(instructions[0]))); + // SCOPE_FAIL { + // fmt::print("Instruction Listing:\n"); + // fmt::print("{}\n", Common::DisassembleAArch32(std::is_same_v, initial_pc, (const u8*)instructions.data(), instructions.size() * sizeof(instructions[0]))); - fmt::print("Initial register listing:\n"); - for (size_t i = 0; i < regs.size(); ++i) { - fmt::print("{:3s}: {:08x}\n", static_cast(i), regs[i]); - } - for (size_t i = 0; i < vecs.size(); ++i) { - fmt::print("{:3s}: {:08x}\n", static_cast(i), vecs[i]); - } - fmt::print("cpsr {:08x}\n", cpsr); - fmt::print("fpcr {:08x}\n", fpscr); - fmt::print("fpcr.AHP {}\n", FP::FPCR{fpscr}.AHP()); - fmt::print("fpcr.DN {}\n", FP::FPCR{fpscr}.DN()); - fmt::print("fpcr.FZ {}\n", FP::FPCR{fpscr}.FZ()); - fmt::print("fpcr.RMode {}\n", static_cast(FP::FPCR{fpscr}.RMode())); - fmt::print("fpcr.FZ16 {}\n", FP::FPCR{fpscr}.FZ16()); - fmt::print("\n"); + // fmt::print("Initial register listing:\n"); + // for (size_t i = 0; i < regs.size(); ++i) { + // fmt::print("{:3s}: {:08x}\n", static_cast(i), regs[i]); + // } + // for (size_t i = 0; i < vecs.size(); ++i) { + // fmt::print("{:3s}: {:08x}\n", static_cast(i), vecs[i]); + // } + // fmt::print("cpsr {:08x}\n", cpsr); + // fmt::print("fpcr {:08x}\n", fpscr); + // fmt::print("fpcr.AHP {}\n", FP::FPCR{fpscr}.AHP()); + // fmt::print("fpcr.DN {}\n", FP::FPCR{fpscr}.DN()); + // fmt::print("fpcr.FZ {}\n", FP::FPCR{fpscr}.FZ()); + // fmt::print("fpcr.RMode {}\n", static_cast(FP::FPCR{fpscr}.RMode())); + // fmt::print("fpcr.FZ16 {}\n", FP::FPCR{fpscr}.FZ16()); + // fmt::print("\n"); - fmt::print("Final register listing:\n"); - fmt::print(" unicorn dynarmic\n"); - const auto uni_regs = uni.GetRegisters(); - for (size_t i = 0; i < regs.size(); ++i) { - fmt::print("{:3s}: {:08x} {:08x} {}\n", static_cast(i), uni_regs[i], jit.Regs()[i], uni_regs[i] != jit.Regs()[i] ? "*" : ""); - } - const auto uni_ext_regs = uni.GetExtRegs(); - for (size_t i = 0; i < vecs.size(); ++i) { - fmt::print("s{:2d}: {:08x} {:08x} {}\n", static_cast(i), uni_ext_regs[i], jit.ExtRegs()[i], uni_ext_regs[i] != jit.ExtRegs()[i] ? "*" : ""); - } - fmt::print("cpsr {:08x} {:08x} {}\n", uni.GetCpsr(), jit.Cpsr(), uni.GetCpsr() != jit.Cpsr() ? "*" : ""); - fmt::print("fpsr {:08x} {:08x} {}\n", uni.GetFpscr(), jit.Fpscr(), (uni.GetFpscr() & 0xF0000000) != (jit.Fpscr() & 0xF0000000) ? "*" : ""); - fmt::print("\n"); + // fmt::print("Final register listing:\n"); + // fmt::print(" unicorn dynarmic\n"); + // const auto uni_regs = uni.GetRegisters(); + // for (size_t i = 0; i < regs.size(); ++i) { + // fmt::print("{:3s}: {:08x} {:08x} {}\n", static_cast(i), uni_regs[i], jit.Regs()[i], uni_regs[i] != jit.Regs()[i] ? "*" : ""); + // } + // const auto uni_ext_regs = uni.GetExtRegs(); + // for (size_t i = 0; i < vecs.size(); ++i) { + // fmt::print("s{:2d}: {:08x} {:08x} {}\n", static_cast(i), uni_ext_regs[i], jit.ExtRegs()[i], uni_ext_regs[i] != jit.ExtRegs()[i] ? "*" : ""); + // } + // fmt::print("cpsr {:08x} {:08x} {}\n", uni.GetCpsr(), jit.Cpsr(), uni.GetCpsr() != jit.Cpsr() ? "*" : ""); + // fmt::print("fpsr {:08x} {:08x} {}\n", uni.GetFpscr(), jit.Fpscr(), (uni.GetFpscr() & 0xF0000000) != (jit.Fpscr() & 0xF0000000) ? "*" : ""); + // fmt::print("\n"); - fmt::print("Modified memory:\n"); - fmt::print(" uni dyn\n"); - auto uni_iter = uni_env.modified_memory.begin(); - auto jit_iter = jit_env.modified_memory.begin(); - while (uni_iter != uni_env.modified_memory.end() || jit_iter != jit_env.modified_memory.end()) { - if (uni_iter == uni_env.modified_memory.end() || (jit_iter != jit_env.modified_memory.end() && uni_iter->first > jit_iter->first)) { - fmt::print("{:08x}: {:02x} *\n", jit_iter->first, jit_iter->second); - jit_iter++; - } else if (jit_iter == jit_env.modified_memory.end() || jit_iter->first > uni_iter->first) { - fmt::print("{:08x}: {:02x} *\n", uni_iter->first, uni_iter->second); - uni_iter++; - } else if (uni_iter->first == jit_iter->first) { - fmt::print("{:08x}: {:02x} {:02x} {}\n", uni_iter->first, uni_iter->second, jit_iter->second, uni_iter->second != jit_iter->second ? "*" : ""); - uni_iter++; - jit_iter++; - } - } - fmt::print("\n"); + // fmt::print("Modified memory:\n"); + // fmt::print(" uni dyn\n"); + // auto uni_iter = uni_env.modified_memory.begin(); + // auto jit_iter = jit_env.modified_memory.begin(); + // while (uni_iter != uni_env.modified_memory.end() || jit_iter != jit_env.modified_memory.end()) { + // if (uni_iter == uni_env.modified_memory.end() || (jit_iter != jit_env.modified_memory.end() && uni_iter->first > jit_iter->first)) { + // fmt::print("{:08x}: {:02x} *\n", jit_iter->first, jit_iter->second); + // jit_iter++; + // } else if (jit_iter == jit_env.modified_memory.end() || jit_iter->first > uni_iter->first) { + // fmt::print("{:08x}: {:02x} *\n", uni_iter->first, uni_iter->second); + // uni_iter++; + // } else if (uni_iter->first == jit_iter->first) { + // fmt::print("{:08x}: {:02x} {:02x} {}\n", uni_iter->first, uni_iter->second, jit_iter->second, uni_iter->second != jit_iter->second ? "*" : ""); + // uni_iter++; + // jit_iter++; + // } + // } + // fmt::print("\n"); - fmt::print("x86_64:\n"); - fmt::print("{}", jit.Disassemble()); + // fmt::print("x86_64:\n"); + // fmt::print("{}", jit.Disassemble()); - fmt::print("Interrupts:\n"); - for (const auto& i : uni_env.interrupts) { - std::puts(i.c_str()); - } - }; + // fmt::print("Interrupts:\n"); + // for (const auto& i : uni_env.interrupts) { + // std::puts(i.c_str()); + // } + // }; REQUIRE(uni_env.code_mem_modified_by_guest == jit_env.code_mem_modified_by_guest); if (uni_env.code_mem_modified_by_guest) { diff --git a/src/dynarmic/tests/A32/fuzz_thumb.cpp b/src/dynarmic/tests/A32/fuzz_thumb.cpp index 3a561a18d9..7fef968b95 100644 --- a/src/dynarmic/tests/A32/fuzz_thumb.cpp +++ b/src/dynarmic/tests/A32/fuzz_thumb.cpp @@ -16,13 +16,13 @@ #include #include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" -#include "../rand_int.h" -#include "../unicorn_emu/a32_unicorn.h" -#include "./testenv.h" -#include "../native/testenv.h" +#include "dynarmic/tests/rand_int.h" +#include "dynarmic/tests/unicorn_emu/a32_unicorn.h" +#include "dynarmic/tests/A32/testenv.h" +#include "dynarmic/tests/native/testenv.h" #include "dynarmic/frontend/A32/FPSCR.h" #include "dynarmic/frontend/A32/PSR.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" @@ -107,7 +107,7 @@ static bool DoesBehaviorMatch(const A32Unicorn& uni, const A32::Ji return std::equal(interp_regs.begin(), interp_regs.end(), jit_regs.begin(), jit_regs.end()) && uni.GetCpsr() == jit.Cpsr() && interp_write_records == jit_write_records; } -static void RunInstance(size_t run_number, ThumbTestEnv& test_env, A32Unicorn& uni, A32::Jit& jit, const ThumbTestEnv::RegisterArray& initial_regs, size_t instruction_count, size_t instructions_to_execute_count) { +static void RunInstance(size_t run_number, ThumbTestEnv& test_env, A32::UserConfig const& config, A32Unicorn& uni, A32::Jit& jit, const ThumbTestEnv::RegisterArray& initial_regs, size_t instruction_count, size_t instructions_to_execute_count) { uni.ClearPageCache(); jit.ClearCache(); @@ -145,9 +145,8 @@ static void RunInstance(size_t run_number, ThumbTestEnv& test_env, A32Unicorn uni{test_env}; - A32::Jit jit{GetUserConfig(&test_env)}; + A32::UserConfig config{GetUserConfig(&test_env)}; + A32::Jit jit{config}; constexpr ThumbTestEnv::RegisterArray initial_regs{ 0xe90ecd70, @@ -534,5 +539,5 @@ TEST_CASE("Verify fix for off by one error in MemoryRead32 worked", "[Thumb][Thu 0xE7FE, // b +#0 }; - RunInstance(1, test_env, uni, jit, initial_regs, 5, 5); + RunInstance(1, test_env, config, uni, jit, initial_regs, 5, 5); } diff --git a/src/dynarmic/tests/A32/test_arm_instructions.cpp b/src/dynarmic/tests/A32/test_arm_instructions.cpp index 2e7e7dc5d8..5a27cd499c 100644 --- a/src/dynarmic/tests/A32/test_arm_instructions.cpp +++ b/src/dynarmic/tests/A32/test_arm_instructions.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -8,8 +8,8 @@ #include -#include "./testenv.h" -#include "../native/testenv.h" +#include "dynarmic/tests/A32/testenv.h" +#include "dynarmic/tests/native/testenv.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/interface/A32/a32.h" diff --git a/src/dynarmic/tests/A32/test_coprocessor.cpp b/src/dynarmic/tests/A32/test_coprocessor.cpp index 3888d2c68b..66ae6ebb09 100644 --- a/src/dynarmic/tests/A32/test_coprocessor.cpp +++ b/src/dynarmic/tests/A32/test_coprocessor.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -10,8 +10,8 @@ #include -#include "./testenv.h" -#include "../native/testenv.h" +#include "dynarmic/tests/A32/testenv.h" +#include "dynarmic/tests/native/testenv.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/coprocessor.h" diff --git a/src/dynarmic/tests/A32/test_svc.cpp b/src/dynarmic/tests/A32/test_svc.cpp index 0be2432c7b..0cfaf23ec5 100644 --- a/src/dynarmic/tests/A32/test_svc.cpp +++ b/src/dynarmic/tests/A32/test_svc.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -10,8 +10,8 @@ #include -#include "./testenv.h" -#include "../native/testenv.h" +#include "dynarmic/tests/A32/testenv.h" +#include "dynarmic/tests/native/testenv.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A32/test_thumb_instructions.cpp b/src/dynarmic/tests/A32/test_thumb_instructions.cpp index d509acdd8d..6aa1b7389b 100644 --- a/src/dynarmic/tests/A32/test_thumb_instructions.cpp +++ b/src/dynarmic/tests/A32/test_thumb_instructions.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -9,8 +9,8 @@ #include #include "dynarmic/common/common_types.h" -#include "./testenv.h" -#include "../native/testenv.h" +#include "dynarmic/tests/A32/testenv.h" +#include "dynarmic/tests/native/testenv.h" #include "dynarmic/interface/A32/a32.h" static Dynarmic::A32::UserConfig GetUserConfig(ThumbTestEnv* testenv) { diff --git a/src/dynarmic/tests/A64/a64.cpp b/src/dynarmic/tests/A64/a64.cpp index d331c5e8a1..e85986ea5a 100644 --- a/src/dynarmic/tests/A64/a64.cpp +++ b/src/dynarmic/tests/A64/a64.cpp @@ -9,8 +9,8 @@ #include #include -#include "./testenv.h" -#include "../native/testenv.h" +#include "dynarmic/tests/A64/testenv.h" +#include "dynarmic/tests/native/testenv.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/interface/exclusive_monitor.h" #include "dynarmic/interface/optimization_flags.h" @@ -415,6 +415,105 @@ TEST_CASE("A64: URSHL", "[a64]") { CHECK(jit.GetVector(9) == Vector{0x0000000000000002, 0x12db8b8280e0ba}); } +TEST_CASE("A64: SQSHLU", "[a64]") { + A64TestEnv env; + A64::UserConfig jit_user_config{}; + jit_user_config.callbacks = &env; + A64::Jit jit{jit_user_config}; + + oaknut::VectorCodeGenerator code{env.code_mem, nullptr}; + code.SQSHLU(V8.B16(), V0.B16(), 1); + code.SQSHLU(V9.H8(), V1.H8(), 2); + code.SQSHLU(V10.S4(), V2.S4(), 28); + code.SQSHLU(V11.D2(), V3.D2(), 4); + code.SQSHLU(V12.S4(), V0.S4(), 1); + code.SQSHLU(V13.S4(), V1.S4(), 3); + code.SQSHLU(V14.S4(), V2.S4(), 0); + code.SQSHLU(V15.S4(), V3.S4(), 0); + + jit.SetVector(0, Vector{0xffffffff'18ba6a6a, 0x7fffffff'943b954f}); + jit.SetVector(1, Vector{0x0000000b'0000000f, 0xffffffff'ffffffff}); + jit.SetVector(2, Vector{0x00000001'000000ff, 0x00000010'0000007f}); + jit.SetVector(3, Vector{0xffffffffffffffff, 0x96dc5c140705cd04}); + + env.ticks_left = env.code_mem.size(); + CheckedRun([&]() { jit.Run(); }); + + CHECK(jit.GetVector(8) == Vector{0x3000d4d4, 0xfe0000000076009e}); + CHECK(jit.GetVector(9) == Vector{0x2c0000003c, 0}); + CHECK(jit.GetVector(10) == Vector{0x10000000'ffffffff, 0xffffffff'ffffffff}); + CHECK(jit.GetVector(11) == Vector{0, 0}); + CHECK(jit.GetVector(12) == Vector{0x3174d4d4, 0xfffffffe00000000}); + CHECK(jit.GetVector(13) == Vector{0x5800000078, 0}); + CHECK(jit.GetVector(14) == Vector{0x1000000ff, 0x100000007f}); + CHECK(jit.GetVector(15) == Vector{0, 0x705cd04}); +} + +TEST_CASE("A64: SMIN", "[a64]") { + A64TestEnv env; + A64::UserConfig jit_user_config{}; + jit_user_config.callbacks = &env; + A64::Jit jit{jit_user_config}; + + oaknut::VectorCodeGenerator code{env.code_mem, nullptr}; + code.SMIN(V8.B16(), V0.B16(), V3.B16()); + code.SMIN(V9.H8(), V1.H8(), V2.H8()); + code.SMIN(V10.S4(), V2.S4(), V3.S4()); + code.SMIN(V11.S4(), V3.S4(), V3.S4()); + code.SMIN(V12.S4(), V0.S4(), V3.S4()); + code.SMIN(V13.S4(), V1.S4(), V2.S4()); + code.SMIN(V14.S4(), V2.S4(), V1.S4()); + code.SMIN(V15.S4(), V3.S4(), V0.S4()); + + jit.SetPC(0); + jit.SetVector(0, Vector{0xffffffff'18ba6a6a, 0x7fffffff'943b954f}); + jit.SetVector(1, Vector{0x0000000b'0000000f, 0xffffffff'ffffffff}); + jit.SetVector(2, Vector{0x00000001'000000ff, 0x00000010'0000007f}); + jit.SetVector(3, Vector{0xffffffff'ffffffff, 0x96dc5c14'0705cd04}); + + env.ticks_left = 4; + CheckedRun([&]() { jit.Run(); }); + + REQUIRE(jit.GetVector(8) == Vector{0xffffffffffbaffff, 0x96dcffff94059504}); + REQUIRE(jit.GetVector(9) == Vector{0x10000000f, 0xffffffffffffffff}); + REQUIRE(jit.GetVector(10) == Vector{0xffffffffffffffff, 0x96dc5c140000007f}); +} + +TEST_CASE("A64: SMINP", "[a64]") { + A64TestEnv env; + A64::UserConfig jit_user_config{}; + jit_user_config.callbacks = &env; + A64::Jit jit{jit_user_config}; + + oaknut::VectorCodeGenerator code{env.code_mem, nullptr}; + code.SMINP(V8.B16(), V0.B16(), V3.B16()); + code.SMINP(V9.H8(), V1.H8(), V2.H8()); + code.SMINP(V10.S4(), V2.S4(), V1.S4()); + code.SMINP(V11.S4(), V3.S4(), V3.S4()); + code.SMINP(V12.S4(), V0.S4(), V3.S4()); + code.SMINP(V13.S4(), V1.S4(), V2.S4()); + code.SMINP(V14.S4(), V2.S4(), V1.S4()); + code.SMINP(V15.S4(), V3.S4(), V0.S4()); + + jit.SetPC(0); + jit.SetVector(0, Vector{0xffffffff'18ba6a6a, 0x7fffffff'943b954f}); + jit.SetVector(1, Vector{0x0000000b'0000000f, 0xffffffff'ffffffff}); + jit.SetVector(2, Vector{0x00000001'000000ff, 0x00000010'0000007f}); + jit.SetVector(3, Vector{0xffffffff'ffffffff, 0x96dc5c14'0705cd04}); + + env.ticks_left = 4; + CheckedRun([&]() { jit.Run(); }); + + REQUIRE(jit.GetVector(8) == Vector{0xffff9495ffffba6a, 0x961405cdffffffff}); + REQUIRE(jit.GetVector(9) == Vector{0xffffffff00000000, 0}); + REQUIRE(jit.GetVector(10) == Vector{0x1000000001, 0xffffffff0000000b}); + REQUIRE(jit.GetVector(11) == Vector{0x96dc5c14ffffffff, 0x96dc5c14ffffffff}); + REQUIRE(jit.GetVector(12) == Vector{0x943b954fffffffff, 0x96dc5c14ffffffff}); + REQUIRE(jit.GetVector(13) == Vector{0xffffffff0000000b, 0x1000000001}); + REQUIRE(jit.GetVector(14) == Vector{0x1000000001, 0xffffffff0000000b}); + REQUIRE(jit.GetVector(15) == Vector{0x96dc5c14ffffffff, 0x943b954fffffffff}); +} + TEST_CASE("A64: XTN", "[a64]") { A64TestEnv env; A64::UserConfig jit_user_config{}; diff --git a/src/dynarmic/tests/A64/fp_min_max.cpp b/src/dynarmic/tests/A64/fp_min_max.cpp index 1669b63071..313b5e5117 100644 --- a/src/dynarmic/tests/A64/fp_min_max.cpp +++ b/src/dynarmic/tests/A64/fp_min_max.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -11,8 +11,8 @@ #include #include "dynarmic/common/common_types.h" -#include "./testenv.h" -#include "../native/testenv.h" +#include "dynarmic/tests/A64/testenv.h" +#include "dynarmic/tests/native/testenv.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp b/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp index 2cdf288eab..749cc77126 100644 --- a/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp +++ b/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp @@ -12,14 +12,13 @@ #include #include -#include #include "dynarmic/common/common_types.h" -#include "../fuzz_util.h" -#include "../rand_int.h" -#include "../unicorn_emu/a64_unicorn.h" -#include "./testenv.h" -#include "../native/testenv.h" +#include "dynarmic/tests/fuzz_util.h" +#include "dynarmic/tests/rand_int.h" +#include "dynarmic/tests/unicorn_emu/a64_unicorn.h" +#include "dynarmic/tests/A64/testenv.h" +#include "dynarmic/tests/native/testenv.h" #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/llvm_disassemble.h" @@ -168,7 +167,7 @@ static Dynarmic::A64::UserConfig GetUserConfig(A64TestEnv& jit_env) { return jit_user_config; } -static void RunTestInstance(Dynarmic::A64::Jit& jit, A64Unicorn& uni, A64TestEnv& jit_env, A64TestEnv& uni_env, const A64Unicorn::RegisterArray& regs, const A64Unicorn::VectorArray& vecs, const size_t instructions_start, const std::vector& instructions, const u32 pstate, const u32 fpcr) { +static void RunTestInstance(Dynarmic::A64::Jit& jit, A64Unicorn& uni, A64TestEnv& jit_env, A64TestEnv& uni_env, Dynarmic::A64::UserConfig& conf, const A64Unicorn::RegisterArray& regs, const A64Unicorn::VectorArray& vecs, const size_t instructions_start, const std::vector& instructions, const u32 pstate, const u32 fpcr) { jit_env.code_mem = instructions; uni_env.code_mem = instructions; jit_env.code_mem.emplace_back(0x14000000); // B . @@ -205,85 +204,82 @@ static void RunTestInstance(Dynarmic::A64::Jit& jit, A64Unicorn& uni, A64TestEnv uni_env.ticks_left = instructions.size() * 4; uni.Run(); - SCOPE_FAIL { - fmt::print("Instruction Listing:\n"); - for (u32 instruction : instructions) { - fmt::print("{:08x} {}\n", instruction, Common::DisassembleAArch64(instruction)); - } - fmt::print("\n"); + // SCOPE_FAIL { + // fmt::print("Instruction Listing:\n"); + // for (u32 instruction : instructions) { + // fmt::print("{:08x} {}\n", instruction, Common::DisassembleAArch64(instruction)); + // } + // fmt::print("\n"); - fmt::print("Initial register listing:\n"); - for (size_t i = 0; i < regs.size(); ++i) { - fmt::print("{:3s}: {:016x}\n", A64::RegToString(static_cast(i)), regs[i]); - } - for (size_t i = 0; i < vecs.size(); ++i) { - fmt::print("{:3s}: {:016x}{:016x}\n", A64::VecToString(static_cast(i)), vecs[i][1], vecs[i][0]); - } - fmt::print("sp : {:016x}\n", initial_sp); - fmt::print("pc : {:016x}\n", instructions_start); - fmt::print("p : {:08x}\n", pstate); - fmt::print("fpcr {:08x}\n", fpcr); - fmt::print("fpcr.AHP {}\n", FP::FPCR{fpcr}.AHP()); - fmt::print("fpcr.DN {}\n", FP::FPCR{fpcr}.DN()); - fmt::print("fpcr.FZ {}\n", FP::FPCR{fpcr}.FZ()); - fmt::print("fpcr.RMode {}\n", static_cast(FP::FPCR{fpcr}.RMode())); - fmt::print("fpcr.FZ16 {}\n", FP::FPCR{fpcr}.FZ16()); - fmt::print("\n"); + // fmt::print("Initial register listing:\n"); + // for (size_t i = 0; i < regs.size(); ++i) { + // fmt::print("{:3s}: {:016x}\n", A64::RegToString(static_cast(i)), regs[i]); + // } + // for (size_t i = 0; i < vecs.size(); ++i) { + // fmt::print("{:3s}: {:016x}{:016x}\n", A64::VecToString(static_cast(i)), vecs[i][1], vecs[i][0]); + // } + // fmt::print("sp : {:016x}\n", initial_sp); + // fmt::print("pc : {:016x}\n", instructions_start); + // fmt::print("p : {:08x}\n", pstate); + // fmt::print("fpcr {:08x}\n", fpcr); + // fmt::print("fpcr.AHP {}\n", FP::FPCR{fpcr}.AHP()); + // fmt::print("fpcr.DN {}\n", FP::FPCR{fpcr}.DN()); + // fmt::print("fpcr.FZ {}\n", FP::FPCR{fpcr}.FZ()); + // fmt::print("fpcr.RMode {}\n", static_cast(FP::FPCR{fpcr}.RMode())); + // fmt::print("fpcr.FZ16 {}\n", FP::FPCR{fpcr}.FZ16()); + // fmt::print("\n"); - fmt::print("Final register listing:\n"); - fmt::print(" unicorn dynarmic\n"); - const auto uni_regs = uni.GetRegisters(); - for (size_t i = 0; i < regs.size(); ++i) { - fmt::print("{:3s}: {:016x} {:016x} {}\n", A64::RegToString(static_cast(i)), uni_regs[i], jit.GetRegisters()[i], uni_regs[i] != jit.GetRegisters()[i] ? "*" : ""); - } - const auto uni_vecs = uni.GetVectors(); - for (size_t i = 0; i < vecs.size(); ++i) { - fmt::print("{:3s}: {:016x}{:016x} {:016x}{:016x} {}\n", A64::VecToString(static_cast(i)), - uni_vecs[i][1], uni_vecs[i][0], - jit.GetVectors()[i][1], jit.GetVectors()[i][0], - uni_vecs[i] != jit.GetVectors()[i] ? "*" : ""); - } - fmt::print("sp : {:016x} {:016x} {}\n", uni.GetSP(), jit.GetSP(), uni.GetSP() != jit.GetSP() ? "*" : ""); - fmt::print("pc : {:016x} {:016x} {}\n", uni.GetPC(), jit.GetPC(), uni.GetPC() != jit.GetPC() ? "*" : ""); - fmt::print("p : {:08x} {:08x} {}\n", uni.GetPstate(), jit.GetPstate(), (uni.GetPstate() & 0xF0000000) != (jit.GetPstate() & 0xF0000000) ? "*" : ""); - fmt::print("qc : {:08x} {:08x} {}\n", uni.GetFpsr(), jit.GetFpsr(), FP::FPSR{uni.GetFpsr()}.QC() != FP::FPSR{jit.GetFpsr()}.QC() ? "*" : ""); - fmt::print("\n"); + // fmt::print("Final register listing:\n"); + // fmt::print(" unicorn dynarmic\n"); + // const auto uni_regs = uni.GetRegisters(); + // for (size_t i = 0; i < regs.size(); ++i) { + // fmt::print("{:3s}: {:016x} {:016x} {}\n", A64::RegToString(static_cast(i)), uni_regs[i], jit.GetRegisters()[i], uni_regs[i] != jit.GetRegisters()[i] ? "*" : ""); + // } + // const auto uni_vecs = uni.GetVectors(); + // for (size_t i = 0; i < vecs.size(); ++i) { + // fmt::print("{:3s}: {:016x}{:016x} {:016x}{:016x} {}\n", A64::VecToString(static_cast(i)), + // uni_vecs[i][1], uni_vecs[i][0], + // jit.GetVectors()[i][1], jit.GetVectors()[i][0], + // uni_vecs[i] != jit.GetVectors()[i] ? "*" : ""); + // } + // fmt::print("sp : {:016x} {:016x} {}\n", uni.GetSP(), jit.GetSP(), uni.GetSP() != jit.GetSP() ? "*" : ""); + // fmt::print("pc : {:016x} {:016x} {}\n", uni.GetPC(), jit.GetPC(), uni.GetPC() != jit.GetPC() ? "*" : ""); + // fmt::print("p : {:08x} {:08x} {}\n", uni.GetPstate(), jit.GetPstate(), (uni.GetPstate() & 0xF0000000) != (jit.GetPstate() & 0xF0000000) ? "*" : ""); + // fmt::print("qc : {:08x} {:08x} {}\n", uni.GetFpsr(), jit.GetFpsr(), FP::FPSR{uni.GetFpsr()}.QC() != FP::FPSR{jit.GetFpsr()}.QC() ? "*" : ""); + // fmt::print("\n"); - fmt::print("Modified memory:\n"); - fmt::print(" uni dyn\n"); - auto uni_iter = uni_env.modified_memory.begin(); - auto jit_iter = jit_env.modified_memory.begin(); - while (uni_iter != uni_env.modified_memory.end() || jit_iter != jit_env.modified_memory.end()) { - if (uni_iter == uni_env.modified_memory.end() || (jit_iter != jit_env.modified_memory.end() && uni_iter->first > jit_iter->first)) { - fmt::print("{:016x}: {:02x} *\n", jit_iter->first, jit_iter->second); - jit_iter++; - } else if (jit_iter == jit_env.modified_memory.end() || jit_iter->first > uni_iter->first) { - fmt::print("{:016x}: {:02x} *\n", uni_iter->first, uni_iter->second); - uni_iter++; - } else if (uni_iter->first == jit_iter->first) { - fmt::print("{:016x}: {:02x} {:02x} {}\n", uni_iter->first, uni_iter->second, jit_iter->second, uni_iter->second != jit_iter->second ? "*" : ""); - uni_iter++; - jit_iter++; - } - } - fmt::print("\n"); + // fmt::print("Modified memory:\n"); + // fmt::print(" uni dyn\n"); + // auto uni_iter = uni_env.modified_memory.begin(); + // auto jit_iter = jit_env.modified_memory.begin(); + // while (uni_iter != uni_env.modified_memory.end() || jit_iter != jit_env.modified_memory.end()) { + // if (uni_iter == uni_env.modified_memory.end() || (jit_iter != jit_env.modified_memory.end() && uni_iter->first > jit_iter->first)) { + // fmt::print("{:016x}: {:02x} *\n", jit_iter->first, jit_iter->second); + // jit_iter++; + // } else if (jit_iter == jit_env.modified_memory.end() || jit_iter->first > uni_iter->first) { + // fmt::print("{:016x}: {:02x} *\n", uni_iter->first, uni_iter->second); + // uni_iter++; + // } else if (uni_iter->first == jit_iter->first) { + // fmt::print("{:016x}: {:02x} {:02x} {}\n", uni_iter->first, uni_iter->second, jit_iter->second, uni_iter->second != jit_iter->second ? "*" : ""); + // uni_iter++; + // jit_iter++; + // } + // } + // fmt::print("\n"); - const auto get_code = [&jit_env](u64 vaddr) { return jit_env.MemoryReadCode(vaddr); }; - IR::Block ir_block = A64::Translate({instructions_start, FP::FPCR{fpcr}}, get_code, {}); - fmt::print("IR:\n"); - fmt::print("{}\n", IR::DumpBlock(ir_block)); - Optimization::Optimize(ir_block, conf, {}); - fmt::print("Optimized IR:\n"); - fmt::print("{}\n", IR::DumpBlock(ir_block)); - - fmt::print("x86_64:\n"); - fmt::print("{}", jit.Disassemble()); - - fmt::print("Interrupts:\n"); - for (auto& i : uni_env.interrupts) { - puts(i.c_str()); - } - }; + // const auto get_code = [&jit_env](u64 vaddr) { return jit_env.MemoryReadCode(vaddr); }; + // const A64::LocationDescriptor location{instructions_start, FP::FPCR{fpcr}}; + // IR::Block ir_block{location}; + // A64::Translate(ir_block, location, get_code, {}); + // fmt::print("IR:\n{}\n", IR::DumpBlock(ir_block)); + // Optimization::Optimize(ir_block, conf, {}); + // fmt::print("Optimized IR:\n{}\n", IR::DumpBlock(ir_block)); + // fmt::print("x86_64:\n{}", jit.Disassemble()); + // fmt::print("Interrupts:\n"); + // for (auto& i : uni_env.interrupts) { + // puts(i.c_str()); + // } + // }; REQUIRE(uni_env.code_mem_modified_by_guest == jit_env.code_mem_modified_by_guest); if (uni_env.code_mem_modified_by_guest) { @@ -304,7 +300,8 @@ TEST_CASE("A64: Single random instruction", "[a64][unicorn]") { A64TestEnv jit_env{}; A64TestEnv uni_env{}; - Dynarmic::A64::Jit jit{GetUserConfig(jit_env)}; + Dynarmic::A64::UserConfig conf{GetUserConfig(jit_env)}; + Dynarmic::A64::Jit jit{conf}; A64Unicorn uni{uni_env}; A64Unicorn::RegisterArray regs; @@ -323,7 +320,7 @@ TEST_CASE("A64: Single random instruction", "[a64][unicorn]") { INFO("Instruction: 0x" << std::hex << instructions[0]); - RunTestInstance(jit, uni, jit_env, uni_env, regs, vecs, start_address, instructions, pstate, fpcr); + RunTestInstance(jit, uni, jit_env, uni_env, conf, regs, vecs, start_address, instructions, pstate, fpcr); } } @@ -331,7 +328,8 @@ TEST_CASE("A64: Floating point instructions", "[a64][unicorn]") { A64TestEnv jit_env{}; A64TestEnv uni_env{}; - Dynarmic::A64::Jit jit{GetUserConfig(jit_env)}; + Dynarmic::A64::UserConfig conf{GetUserConfig(jit_env)}; + Dynarmic::A64::Jit jit{conf}; A64Unicorn uni{uni_env}; static constexpr std::array float_numbers{ @@ -448,7 +446,7 @@ TEST_CASE("A64: Floating point instructions", "[a64][unicorn]") { INFO("Instruction: 0x" << std::hex << instructions[0]); - RunTestInstance(jit, uni, jit_env, uni_env, regs, vecs, start_address, instructions, pstate, fpcr); + RunTestInstance(jit, uni, jit_env, uni_env, conf, regs, vecs, start_address, instructions, pstate, fpcr); } } @@ -456,7 +454,8 @@ TEST_CASE("A64: Small random block", "[a64][unicorn]") { A64TestEnv jit_env{}; A64TestEnv uni_env{}; - Dynarmic::A64::Jit jit{GetUserConfig(jit_env)}; + Dynarmic::A64::UserConfig conf{GetUserConfig(jit_env)}; + Dynarmic::A64::Jit jit{conf}; A64Unicorn uni{uni_env}; A64Unicorn::RegisterArray regs; @@ -483,7 +482,7 @@ TEST_CASE("A64: Small random block", "[a64][unicorn]") { INFO("Instruction 4: 0x" << std::hex << instructions[3]); INFO("Instruction 5: 0x" << std::hex << instructions[4]); - RunTestInstance(jit, uni, jit_env, uni_env, regs, vecs, start_address, instructions, pstate, fpcr); + RunTestInstance(jit, uni, jit_env, uni_env, conf, regs, vecs, start_address, instructions, pstate, fpcr); } } @@ -491,7 +490,8 @@ TEST_CASE("A64: Large random block", "[a64][unicorn]") { A64TestEnv jit_env{}; A64TestEnv uni_env{}; - Dynarmic::A64::Jit jit{GetUserConfig(jit_env)}; + Dynarmic::A64::UserConfig conf{GetUserConfig(jit_env)}; + Dynarmic::A64::Jit jit{conf}; A64Unicorn uni{uni_env}; A64Unicorn::RegisterArray regs; @@ -512,6 +512,6 @@ TEST_CASE("A64: Large random block", "[a64][unicorn]") { const u32 pstate = RandInt(0, 0xF) << 28; const u32 fpcr = RandomFpcr(); - RunTestInstance(jit, uni, jit_env, uni_env, regs, vecs, start_address, instructions, pstate, fpcr); + RunTestInstance(jit, uni, jit_env, uni_env, conf, regs, vecs, start_address, instructions, pstate, fpcr); } } diff --git a/src/dynarmic/tests/A64/misaligned_page_table.cpp b/src/dynarmic/tests/A64/misaligned_page_table.cpp index fc0bc77428..99afba519d 100644 --- a/src/dynarmic/tests/A64/misaligned_page_table.cpp +++ b/src/dynarmic/tests/A64/misaligned_page_table.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -8,8 +8,8 @@ #include -#include "./testenv.h" -#include "../native/testenv.h" +#include "dynarmic/tests/A64/testenv.h" +#include "dynarmic/tests/native/testenv.h" #include "dynarmic/interface/A64/a64.h" TEST_CASE("misaligned load/store do not use page_table when detect_misaligned_access_via_page_table is set", "[a64]") { diff --git a/src/dynarmic/tests/A64/real_world.cpp b/src/dynarmic/tests/A64/real_world.cpp index a083f16d61..c15f307c3d 100644 --- a/src/dynarmic/tests/A64/real_world.cpp +++ b/src/dynarmic/tests/A64/real_world.cpp @@ -1,11 +1,11 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #include #include -#include "./testenv.h" -#include "../native/testenv.h" +#include "dynarmic/tests/A64/testenv.h" +#include "dynarmic/tests/native/testenv.h" #include "dynarmic/interface/A64/a64.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A64/test_invalidation.cpp b/src/dynarmic/tests/A64/test_invalidation.cpp index 0c92f5f606..3b2d5fef98 100644 --- a/src/dynarmic/tests/A64/test_invalidation.cpp +++ b/src/dynarmic/tests/A64/test_invalidation.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -8,8 +8,8 @@ #include -#include "./testenv.h" -#include "../native/testenv.h" +#include "dynarmic/tests/A64/testenv.h" +#include "dynarmic/tests/native/testenv.h" #include "dynarmic/interface/A64/a64.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A64/verify_unicorn.cpp b/src/dynarmic/tests/A64/verify_unicorn.cpp index 0c0ccc1609..5bdcd0dc3e 100644 --- a/src/dynarmic/tests/A64/verify_unicorn.cpp +++ b/src/dynarmic/tests/A64/verify_unicorn.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2018 MerryMage * SPDX-License-Identifier: 0BSD @@ -7,9 +10,9 @@ #include -#include "../rand_int.h" -#include "../unicorn_emu/a64_unicorn.h" -#include "./testenv.h" +#include "dynarmic/tests/rand_int.h" +#include "dynarmic/tests/unicorn_emu/a64_unicorn.h" +#include "dynarmic/tests/A64/testenv.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 395181efe3..ca44dce593 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -72,7 +72,7 @@ add_executable(dynarmic_print_info print_info.cpp ) create_target_directory_groups(dynarmic_print_info) -target_link_libraries(dynarmic_print_info PRIVATE dynarmic fmt::fmt merry::mcl unordered_dense::unordered_dense) +target_link_libraries(dynarmic_print_info PRIVATE dynarmic fmt::fmt unordered_dense::unordered_dense) if (BOOST_NO_HEADERS) target_link_libraries(dynarmic_print_info PRIVATE Boost::variant Boost::icl Boost::pool) else() @@ -93,7 +93,7 @@ add_executable(dynarmic_test_generator create_target_directory_groups(dynarmic_test_generator) -target_link_libraries(dynarmic_test_generator PRIVATE dynarmic fmt::fmt merry::mcl unordered_dense::unordered_dense) +target_link_libraries(dynarmic_test_generator PRIVATE dynarmic fmt::fmt unordered_dense::unordered_dense) if (BOOST_NO_HEADERS) target_link_libraries(dynarmic_test_generator PRIVATE Boost::variant Boost::icl Boost::pool) else() @@ -110,7 +110,7 @@ add_executable(dynarmic_test_reader test_reader.cpp ) create_target_directory_groups(dynarmic_test_reader) -target_link_libraries(dynarmic_test_reader PRIVATE dynarmic fmt::fmt merry::mcl unordered_dense::unordered_dense) +target_link_libraries(dynarmic_test_reader PRIVATE dynarmic fmt::fmt unordered_dense::unordered_dense) if (BOOST_NO_HEADERS) target_link_libraries(dynarmic_test_reader PRIVATE Boost::variant Boost::icl Boost::pool) else() @@ -123,7 +123,7 @@ target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LIT # create_target_directory_groups(dynarmic_tests) -target_link_libraries(dynarmic_tests PRIVATE dynarmic Catch2::Catch2WithMain fmt::fmt merry::mcl unordered_dense::unordered_dense) +target_link_libraries(dynarmic_tests PRIVATE dynarmic Catch2::Catch2WithMain fmt::fmt unordered_dense::unordered_dense) if (BOOST_NO_HEADERS) target_link_libraries(dynarmic_tests PRIVATE Boost::variant Boost::icl Boost::pool) else() diff --git a/src/dynarmic/tests/fp/FPToFixed.cpp b/src/dynarmic/tests/fp/FPToFixed.cpp index 570ebcbbd7..e16e4460ed 100644 --- a/src/dynarmic/tests/fp/FPToFixed.cpp +++ b/src/dynarmic/tests/fp/FPToFixed.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -12,7 +12,7 @@ #include #include "dynarmic/common/common_types.h" -#include "../rand_int.h" +#include "dynarmic/tests/rand_int.h" #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/op.h" diff --git a/src/dynarmic/tests/fp/mantissa_util_tests.cpp b/src/dynarmic/tests/fp/mantissa_util_tests.cpp index de29d51865..9d16c3624c 100644 --- a/src/dynarmic/tests/fp/mantissa_util_tests.cpp +++ b/src/dynarmic/tests/fp/mantissa_util_tests.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -12,7 +12,7 @@ #include #include "dynarmic/common/common_types.h" -#include "../rand_int.h" +#include "dynarmic/tests/rand_int.h" #include "dynarmic/common/fp/mantissa_util.h" #include "dynarmic/common/safe_ops.h" diff --git a/src/dynarmic/tests/fp/unpacked_tests.cpp b/src/dynarmic/tests/fp/unpacked_tests.cpp index 919f21bf2f..a4f10d1273 100644 --- a/src/dynarmic/tests/fp/unpacked_tests.cpp +++ b/src/dynarmic/tests/fp/unpacked_tests.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -12,7 +12,7 @@ #include #include "dynarmic/common/common_types.h" -#include "../rand_int.h" +#include "dynarmic/tests/rand_int.h" #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/fp/unpacked.h" diff --git a/src/dynarmic/tests/fuzz_util.cpp b/src/dynarmic/tests/fuzz_util.cpp index 351eb1e10f..05f0a9e865 100644 --- a/src/dynarmic/tests/fuzz_util.cpp +++ b/src/dynarmic/tests/fuzz_util.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -6,7 +6,7 @@ * SPDX-License-Identifier: 0BSD */ -#include "./fuzz_util.h" +#include "dynarmic/tests/fuzz_util.h" #include @@ -14,7 +14,7 @@ #include #include "dynarmic/common/assert.h" -#include "./rand_int.h" +#include "dynarmic/tests/rand_int.h" #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/rounding_mode.h" diff --git a/src/dynarmic/tests/native/preserve_xmm.cpp b/src/dynarmic/tests/native/preserve_xmm.cpp index 7421252063..6369e1a74d 100644 --- a/src/dynarmic/tests/native/preserve_xmm.cpp +++ b/src/dynarmic/tests/native/preserve_xmm.cpp @@ -1,12 +1,12 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #include #include #include -#include "../A64/testenv.h" -#include "../native/testenv.h" +#include "dynarmic/tests/A64/testenv.h" +#include "dynarmic/tests/native/testenv.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/interface/exclusive_monitor.h" diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index 33d84ba8cd..19e2ca38b2 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "dynarmic/common/llvm_disassemble.h" diff --git a/src/dynarmic/tests/test_generator.cpp b/src/dynarmic/tests/test_generator.cpp index 4435e762ae..43203c3e13 100644 --- a/src/dynarmic/tests/test_generator.cpp +++ b/src/dynarmic/tests/test_generator.cpp @@ -16,14 +16,13 @@ #include #include -#include -#include +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/common/common_types.h" #include "./A32/testenv.h" #include "./A64/testenv.h" -#include "./fuzz_util.h" -#include "./rand_int.h" +#include "dynarmic/tests/fuzz_util.h" +#include "dynarmic/tests/rand_int.h" #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/llvm_disassemble.h" diff --git a/src/dynarmic/tests/unicorn_emu/a32_unicorn.cpp b/src/dynarmic/tests/unicorn_emu/a32_unicorn.cpp index 1a4f1845d5..9a17bc5582 100644 --- a/src/dynarmic/tests/unicorn_emu/a32_unicorn.cpp +++ b/src/dynarmic/tests/unicorn_emu/a32_unicorn.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -6,22 +6,14 @@ * SPDX-License-Identifier: 0BSD */ -#include "./a32_unicorn.h" - #include - +#include +#include "dynarmic/mcl/bit.hpp" +#include "dynarmic/tests/unicorn_emu/a32_unicorn.h" #include "dynarmic/common/assert.h" -#include +#include "dynarmic/tests/A32/testenv.h" -#include "../A32/testenv.h" - -#define CHECKED(expr) \ - do { \ - if (auto cerr_ = (expr)) { \ - ASSERT(false && "Call " #expr " failed with error: {} ({})\n", static_cast(cerr_), \ - uc_strerror(cerr_)); \ - } \ - } while (0) +#define CHECKED(expr) do if ((expr)) ASSERT(false && "Call " #expr " failed with error\n"); while (0) constexpr u32 BEGIN_ADDRESS = 0; constexpr u32 END_ADDRESS = ~u32(0); diff --git a/src/dynarmic/tests/unicorn_emu/a32_unicorn.h b/src/dynarmic/tests/unicorn_emu/a32_unicorn.h index 79831b8111..d94724d9f2 100644 --- a/src/dynarmic/tests/unicorn_emu/a32_unicorn.h +++ b/src/dynarmic/tests/unicorn_emu/a32_unicorn.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -21,7 +21,7 @@ #include "dynarmic/common/common_types.h" -#include "../A32/testenv.h" +#include "dynarmic/tests/A32/testenv.h" namespace Unicorn::A32 { static constexpr size_t num_gprs = 16; diff --git a/src/dynarmic/tests/unicorn_emu/a64_unicorn.cpp b/src/dynarmic/tests/unicorn_emu/a64_unicorn.cpp index 3f13377f71..c8aa404199 100644 --- a/src/dynarmic/tests/unicorn_emu/a64_unicorn.cpp +++ b/src/dynarmic/tests/unicorn_emu/a64_unicorn.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -6,17 +6,11 @@ * SPDX-License-Identifier: 0BSD */ -#include "./a64_unicorn.h" - +#include +#include "dynarmic/tests/unicorn_emu/a64_unicorn.h" #include "dynarmic/common/assert.h" -#define CHECKED(expr) \ - do { \ - if (auto cerr_ = (expr)) { \ - ASSERT(false && "Call " #expr " failed with error: {} ({})\n", static_cast(cerr_), \ - uc_strerror(cerr_)); \ - } \ - } while (0) +#define CHECKED(expr) do if ((expr)) ASSERT(false && "Call " #expr " failed with error\n"); while (0) constexpr u64 BEGIN_ADDRESS = 0; constexpr u64 END_ADDRESS = ~u64(0); @@ -172,7 +166,7 @@ void A64Unicorn::DumpMemoryInformation() { void A64Unicorn::InterruptHook(uc_engine* uc, u32 int_number, void* user_data) { auto* this_ = static_cast(user_data); - u32 esr; + u32 esr = 0; //CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR_EL0, &esr)); auto ec = esr >> 26; diff --git a/src/dynarmic/tests/unicorn_emu/a64_unicorn.h b/src/dynarmic/tests/unicorn_emu/a64_unicorn.h index 54f09c3b28..1bc5b1cb8e 100644 --- a/src/dynarmic/tests/unicorn_emu/a64_unicorn.h +++ b/src/dynarmic/tests/unicorn_emu/a64_unicorn.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -21,7 +21,7 @@ #include "dynarmic/common/common_types.h" -#include "../A64/testenv.h" +#include "dynarmic/tests/A64/testenv.h" class A64Unicorn final { public: diff --git a/src/frontend_common/settings_generator.cpp b/src/frontend_common/settings_generator.cpp index 46625656b1..2b1a0f35d7 100644 --- a/src/frontend_common/settings_generator.cpp +++ b/src/frontend_common/settings_generator.cpp @@ -4,22 +4,21 @@ #include #include #include "common/settings.h" +#include "common/random.h" #include "settings_generator.h" namespace FrontendCommon { void GenerateSettings() { - static std::random_device rd; - + auto gen = Common::Random::GetMT19937(); // Web Token if (Settings::values.eden_token.GetValue().empty()) { static constexpr const size_t token_length = 48; static constexpr const frozen::string token_set = "abcdefghijklmnopqrstuvwxyz"; static std::uniform_int_distribution token_dist(0, token_set.size() - 1); std::string result; - for (size_t i = 0; i < token_length; ++i) { - size_t idx = token_dist(rd); + size_t idx = token_dist(gen); result += token_set[idx]; } Settings::values.eden_token.SetValue(result); @@ -27,8 +26,6 @@ void GenerateSettings() { // Randomly generated number because, well, we fill the rest automagically ;) // Other serial parts are filled by Region_Index - std::random_device device; - std::mt19937 gen(device()); std::uniform_int_distribution distribution(1, (std::numeric_limits::max)()); if (Settings::values.serial_unit.GetValue() == 0) Settings::values.serial_unit.SetValue(distribution(gen)); diff --git a/src/hid_core/CMakeLists.txt b/src/hid_core/CMakeLists.txt index 26d9ec1d3f..c740cdbe7f 100644 --- a/src/hid_core/CMakeLists.txt +++ b/src/hid_core/CMakeLists.txt @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +# SPDX-License-Identifier: GPL-3.0-or-later + # SPDX-FileCopyrightText: 2018 yuzu Emulator Project # SPDX-License-Identifier: GPL-2.0-or-later @@ -148,13 +151,10 @@ if (MSVC) ) else() target_compile_options(hid_core PRIVATE - -Werror=conversion - - -Wno-sign-conversion - -Wno-cast-function-type - - $<$:-fsized-deallocation> - ) + $<$:-Werror=conversion> + $<$:-Wno-sign-conversion> + $<$:-Wno-cast-function-type> + $<$:-fsized-deallocation>) endif() create_target_directory_groups(hid_core) diff --git a/src/hid_core/frontend/input_converter.cpp b/src/hid_core/frontend/input_converter.cpp index f245a3f769..0709f5c930 100644 --- a/src/hid_core/frontend/input_converter.cpp +++ b/src/hid_core/frontend/input_converter.cpp @@ -1,9 +1,13 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include #include +#include "common/random.h" #include "common/input.h" #include "hid_core/frontend/input_converter.h" @@ -119,15 +123,14 @@ Common::Input::MotionStatus TransformToMotion(const Common::Input::CallbackStatu .properties = properties, }; if (TransformToButton(callback).value) { - std::random_device device; - std::mt19937 gen(device()); std::uniform_int_distribution distribution(-5000, 5000); - status.accel.x.raw_value = static_cast(distribution(gen)) * 0.001f; - status.accel.y.raw_value = static_cast(distribution(gen)) * 0.001f; - status.accel.z.raw_value = static_cast(distribution(gen)) * 0.001f; - status.gyro.x.raw_value = static_cast(distribution(gen)) * 0.001f; - status.gyro.y.raw_value = static_cast(distribution(gen)) * 0.001f; - status.gyro.z.raw_value = static_cast(distribution(gen)) * 0.001f; + auto gen = Common::Random::GetMT19937(); + status.accel.x.raw_value = f32(distribution(gen)) * 0.001f; + status.accel.y.raw_value = f32(distribution(gen)) * 0.001f; + status.accel.z.raw_value = f32(distribution(gen)) * 0.001f; + status.gyro.x.raw_value = f32(distribution(gen)) * 0.001f; + status.gyro.y.raw_value = f32(distribution(gen)) * 0.001f; + status.gyro.z.raw_value = f32(distribution(gen)) * 0.001f; } break; } diff --git a/src/hid_core/resources/npad/npad.cpp b/src/hid_core/resources/npad/npad.cpp index 9355a88c4b..d625617bd5 100644 --- a/src/hid_core/resources/npad/npad.cpp +++ b/src/hid_core/resources/npad/npad.cpp @@ -800,6 +800,10 @@ Result NPad::DisconnectNpad(u64 aruid, Core::HID::NpadIdType npad_id) { auto& controller = GetControllerFromNpadIdType(aruid, npad_id); auto* shared_memory = controller.shared_memory; + if (!shared_memory) { + LOG_WARNING(Service_HID, "DisconnectNpad: shared_memory is null for npad_id={}", npad_id); + return ResultSuccess; + } // Don't reset shared_memory->assignment_mode this value is persistent shared_memory->style_tag.raw = Core::HID::NpadStyleSet::None; // Zero out shared_memory->device_type.raw = 0; diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt index ef1308b1d0..406ee967b0 100644 --- a/src/input_common/CMakeLists.txt +++ b/src/input_common/CMakeLists.txt @@ -44,9 +44,7 @@ if (MSVC) /we4800 # Implicit conversion from 'type' to bool. Possible information loss ) else() - target_compile_options(input_common PRIVATE - -Werror=conversion - ) + target_compile_options(input_common PRIVATE $<$:-Werror=conversion>) endif() if (ANDROID) diff --git a/src/input_common/drivers/udp_client.cpp b/src/input_common/drivers/udp_client.cpp index c930e19de3..f8eac19a3a 100644 --- a/src/input_common/drivers/udp_client.cpp +++ b/src/input_common/drivers/udp_client.cpp @@ -11,6 +11,7 @@ #include "common/logging.h" #include "common/param_package.h" +#include "common/random.h" #include "common/settings.h" #include "input_common/drivers/udp_client.h" #include "input_common/helpers/udp_protocol.h" @@ -31,7 +32,7 @@ public: explicit Socket(const std::string& host, u16 port, SocketCallback callback_) : callback(std::move(callback_)), timer(io_context), - socket(io_context, udp::endpoint(udp::v4(), 0)), client_id(GenerateRandomClientId()) { + socket(io_context, udp::endpoint(udp::v4(), 0)), client_id(Common::Random::Random32(0)) { boost::system::error_code ec{}; auto ipv4 = boost::asio::ip::make_address_v4(host, ec); if (ec.value() != boost::system::errc::success) { @@ -64,11 +65,6 @@ public: } private: - u32 GenerateRandomClientId() const { - std::random_device device; - return device(); - } - void HandleReceive(const boost::system::error_code&, std::size_t bytes_transferred) { if (auto type = Response::Validate(receive_buffer.data(), bytes_transferred)) { switch (*type) { @@ -595,7 +591,7 @@ void TestCommunication(const std::string& host, u16 port, } CalibrationConfigurationJob::CalibrationConfigurationJob( - const std::string& host, u16 port, std::function status_callback, + const std::string& host, u16 port, std::function status_callback, std::function data_callback) { std::thread([=, this] { @@ -604,14 +600,14 @@ CalibrationConfigurationJob::CalibrationConfigurationJob( u16 max_x{}; u16 max_y{}; - Status current_status{Status::Initialized}; + CalibrationStatus current_status{CalibrationStatus::Initialized}; SocketCallback callback{[](Response::Version) {}, [](Response::PortInfo) {}, [&](Response::PadData data) { constexpr u16 CALIBRATION_THRESHOLD = 100; - if (current_status == Status::Initialized) { + if (current_status == CalibrationStatus::Initialized) { // Receiving data means the communication is ready now - current_status = Status::Ready; + current_status = CalibrationStatus::Ready; status_callback(current_status); } if (data.touch[0].is_active == 0) { @@ -621,9 +617,9 @@ CalibrationConfigurationJob::CalibrationConfigurationJob( data.touch[0].y); min_x = (std::min)(min_x, static_cast(data.touch[0].x)); min_y = (std::min)(min_y, static_cast(data.touch[0].y)); - if (current_status == Status::Ready) { + if (current_status == CalibrationStatus::Ready) { // First touch - min data (min_x/min_y) - current_status = Status::Stage1Completed; + current_status = CalibrationStatus::Stage1Completed; status_callback(current_status); } if (data.touch[0].x - min_x > CALIBRATION_THRESHOLD && @@ -632,7 +628,7 @@ CalibrationConfigurationJob::CalibrationConfigurationJob( // configuration max_x = data.touch[0].x; max_y = data.touch[0].y; - current_status = Status::Completed; + current_status = CalibrationStatus::Completed; data_callback(min_x, min_y, max_x, max_y); status_callback(current_status); diff --git a/src/input_common/drivers/udp_client.h b/src/input_common/drivers/udp_client.h index cea9f579ac..65095b43f4 100644 --- a/src/input_common/drivers/udp_client.h +++ b/src/input_common/drivers/udp_client.h @@ -163,7 +163,7 @@ private: /// An async job allowing configuration of the touchpad calibration. class CalibrationConfigurationJob { public: - enum class Status { + enum class CalibrationStatus { Initialized, Ready, Stage1Completed, @@ -176,7 +176,7 @@ public: * @param data_callback Called when calibration data is ready */ explicit CalibrationConfigurationJob(const std::string& host, u16 port, - std::function status_callback, + std::function status_callback, std::function data_callback); ~CalibrationConfigurationJob(); void Stop(); diff --git a/src/network/packet.h b/src/network/packet.h index 9aa2a2c9cf..428f7d6453 100644 --- a/src/network/packet.h +++ b/src/network/packet.h @@ -5,6 +5,7 @@ #include #include +#include #include "common/common_types.h" namespace Network { @@ -118,26 +119,21 @@ private: template Packet& Packet::Read(std::vector& out_data) { - // First extract the size u32 size = 0; Read(size); out_data.resize(size); - // Then extract the data - for (std::size_t i = 0; i < out_data.size(); ++i) { - T character; - Read(character); - out_data[i] = character; + for (auto& elem : out_data) { + Read(elem); } + return *this; } template Packet& Packet::Read(std::array& out_data) { - for (std::size_t i = 0; i < out_data.size(); ++i) { - T character; - Read(character); - out_data[i] = character; + for (auto& elem : out_data) { + Read(elem); } return *this; } diff --git a/src/network/room.cpp b/src/network/room.cpp index 7c257d2bd4..866ceb2fb8 100644 --- a/src/network/room.cpp +++ b/src/network/room.cpp @@ -23,8 +23,6 @@ namespace Network { class Room::RoomImpl { public: - std::mt19937 random_gen; ///< Random number generator. Used for GenerateFakeIPAddress - ENetHost* server = nullptr; ///< Network interface. std::atomic state{State::Closed}; ///< Current state of the room. @@ -51,7 +49,7 @@ public: IPBanList ip_ban_list; ///< List of banned IP addresses mutable std::mutex ban_list_mutex; ///< Mutex for the ban lists - RoomImpl() : random_gen(std::random_device()()) {} + RoomImpl() {} /// Thread that receives and dispatches network packets std::optional room_thread; @@ -834,7 +832,7 @@ void Room::RoomImpl::HandleProxyPacket(const ENetEvent* event) { in_packet.IgnoreBytes(sizeof(u8)); // Protocol - bool broadcast; + bool broadcast = false; in_packet.Read(broadcast); // Broadcast Packet out_packet; @@ -888,7 +886,7 @@ void Room::RoomImpl::HandleLdnPacket(const ENetEvent* event) { IPv4Address remote_ip; in_packet.Read(remote_ip); // Remote IP - bool broadcast; + bool broadcast = false; in_packet.Read(broadcast); // Broadcast Packet out_packet; diff --git a/src/qt_common/config/uisettings.cpp b/src/qt_common/config/uisettings.cpp index 0b668c4bb1..f4f5688781 100644 --- a/src/qt_common/config/uisettings.cpp +++ b/src/qt_common/config/uisettings.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: 2016 Citra Emulator Project @@ -25,6 +25,48 @@ namespace FS = Common::FS; namespace UISettings { +// This shouldn't have anything except static initializers (no functions). So +// QKeySequence(...).toString() is NOT ALLOWED HERE. +// This must be in alphabetical order according to action name as it must have the same order as +// UISetting::values.shortcuts, which is alphabetically ordered. +// clang-format off +const std::array default_hotkeys{{ + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Audio Mute/Unmute")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+M"), std::string("Home+Dpad_Right"), Qt::WindowShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Audio Volume Down")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("-"), std::string("Home+Dpad_Down"), Qt::ApplicationShortcut, true}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Audio Volume Up")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("="), std::string("Home+Dpad_Up"), Qt::ApplicationShortcut, true}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Capture Screenshot")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+P"), std::string("Screenshot"), Qt::WidgetWithChildrenShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Change Adapting Filter")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("F8"), std::string("Home+L"), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Change Docked Mode")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("F10"), std::string("Home+X"), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Change GPU Mode")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("F9"), std::string("Home+R"), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Configure")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+,"), std::string(""), Qt::WidgetWithChildrenShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Configure Current Game")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+."), std::string(""), Qt::WidgetWithChildrenShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Continue/Pause Emulation")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("F4"), std::string("Home+Plus"), Qt::WindowShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Exit Fullscreen")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Esc"), std::string(""), Qt::WindowShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Exit Eden")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+Q"), std::string("Home+Minus"), Qt::WindowShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Fullscreen")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("F11"), std::string("Home+B"), Qt::WindowShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Load File")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+O"), std::string(""), Qt::WidgetWithChildrenShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Load/Remove Amiibo")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("F2"), std::string("Home+A"), Qt::WidgetWithChildrenShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Browse Public Game Lobby")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+B"), std::string(""), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Create Room")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+N"), std::string(""), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Direct Connect to Room")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+C"), std::string(""), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Leave Room")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+L"), std::string(""), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Show Current Room")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+R"), std::string(""), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Restart Emulation")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("F6"), std::string("R+Plus+Minus"), Qt::WindowShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Stop Emulation")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("F5"), std::string("L+Plus+Minus"), Qt::WindowShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "TAS Record")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+F7"), std::string(""), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "TAS Reset")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+F6"), std::string(""), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "TAS Start/Stop")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+F5"), std::string(""), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Filter Bar")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+F"), std::string(""), Qt::WindowShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Framerate Limit")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+U"), std::string("Home+Y"), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Turbo Speed")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+Z"), std::string(""), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Slow Speed")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+X"), std::string(""), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Mouse Panning")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+F9"), std::string(""), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Renderdoc Capture")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string(""), std::string(""), Qt::ApplicationShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Status Bar")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+S"), std::string(""), Qt::WindowShortcut, false}}, + {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Performance Overlay")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+V"), std::string(""), Qt::WindowShortcut, false}}, +}}; +// clang-format on + const Themes themes{{ {"Default", "default"}, {"Default Colorful", "colorful"}, diff --git a/src/qt_common/config/uisettings.h b/src/qt_common/config/uisettings.h index c2a14858dd..4549a36345 100644 --- a/src/qt_common/config/uisettings.h +++ b/src/qt_common/config/uisettings.h @@ -249,47 +249,8 @@ u32 CalculateWidth(u32 height, Settings::AspectRatio ratio); void SaveWindowState(); void RestoreWindowState(std::unique_ptr& qtConfig); -// This shouldn't have anything except static initializers (no functions). So -// QKeySequence(...).toString() is NOT ALLOWED HERE. -// This must be in alphabetical order according to action name as it must have the same order as -// UISetting::values.shortcuts, which is alphabetically ordered. -// clang-format off -const std::array default_hotkeys{{ - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Audio Mute/Unmute")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+M"), std::string("Home+Dpad_Right"), Qt::WindowShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Audio Volume Down")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("-"), std::string("Home+Dpad_Down"), Qt::ApplicationShortcut, true}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Audio Volume Up")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("="), std::string("Home+Dpad_Up"), Qt::ApplicationShortcut, true}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Capture Screenshot")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+P"), std::string("Screenshot"), Qt::WidgetWithChildrenShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Change Adapting Filter")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("F8"), std::string("Home+L"), Qt::ApplicationShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Change Docked Mode")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("F10"), std::string("Home+X"), Qt::ApplicationShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Change GPU Mode")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("F9"), std::string("Home+R"), Qt::ApplicationShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Configure")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+,"), std::string(""), Qt::WidgetWithChildrenShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Configure Current Game")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+."), std::string(""), Qt::WidgetWithChildrenShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Continue/Pause Emulation")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("F4"), std::string("Home+Plus"), Qt::WindowShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Exit Fullscreen")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Esc"), std::string(""), Qt::WindowShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Exit Eden")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+Q"), std::string("Home+Minus"), Qt::WindowShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Fullscreen")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("F11"), std::string("Home+B"), Qt::WindowShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Load File")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+O"), std::string(""), Qt::WidgetWithChildrenShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Load/Remove Amiibo")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("F2"), std::string("Home+A"), Qt::WidgetWithChildrenShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Browse Public Game Lobby")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+B"), std::string(""), Qt::ApplicationShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Create Room")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+N"), std::string(""), Qt::ApplicationShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Direct Connect to Room")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+C"), std::string(""), Qt::ApplicationShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Leave Room")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+L"), std::string(""), Qt::ApplicationShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Show Current Room")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+R"), std::string(""), Qt::ApplicationShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Restart Emulation")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("F6"), std::string("R+Plus+Minus"), Qt::WindowShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Stop Emulation")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("F5"), std::string("L+Plus+Minus"), Qt::WindowShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "TAS Record")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+F7"), std::string(""), Qt::ApplicationShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "TAS Reset")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+F6"), std::string(""), Qt::ApplicationShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "TAS Start/Stop")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+F5"), std::string(""), Qt::ApplicationShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Filter Bar")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+F"), std::string(""), Qt::WindowShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Framerate Limit")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+U"), std::string("Home+Y"), Qt::ApplicationShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Turbo Speed")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+Z"), std::string(""), Qt::ApplicationShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Slow Speed")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+X"), std::string(""), Qt::ApplicationShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Mouse Panning")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+F9"), std::string(""), Qt::ApplicationShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Renderdoc Capture")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string(""), std::string(""), Qt::ApplicationShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Status Bar")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+S"), std::string(""), Qt::WindowShortcut, false}}, - {QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Toggle Performance Overlay")).toStdString(), QStringLiteral(QT_TRANSLATE_NOOP("Hotkeys", "Main Window")).toStdString(), {std::string("Ctrl+V"), std::string(""), Qt::WindowShortcut, false}}, -}}; -// clang-format on +// sync with uisettings.cpp +extern const std::array default_hotkeys; } // namespace UISettings diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index c385951318..e91803c8c6 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +# SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project # SPDX-License-Identifier: GPL-3.0-or-later # SPDX-FileCopyrightText: 2018 yuzu Emulator Project @@ -253,8 +253,7 @@ if (MSVC) ) else() target_compile_options(shader_recompiler PRIVATE - -Werror=conversion - + $<$:-Werror=conversion> # Bracket depth determines maximum size of a fold expression in Clang since 9c9974c3ccb6. # And this in turns limits the size of a std::array. $<$:-fbracket-depth=1024> diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index db11def7b2..378d2cd0ec 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -142,14 +142,12 @@ Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, const auto is_float = UniformDefinitions::IsFloat(member_ptr); const auto num_elements = UniformDefinitions::NumElements(member_ptr); - const std::array zero_vec{ - is_float ? ctx.Const(0.0f) : ctx.Const(0u), - is_float ? ctx.Const(0.0f) : ctx.Const(0u), - is_float ? ctx.Const(0.0f) : ctx.Const(0u), - is_float ? ctx.Const(0.0f) : ctx.Const(0u), - }; + auto const zero_const = is_float ? ctx.Const(0.0f) : ctx.Const(0u); + const std::array zero_vec{zero_const, zero_const, zero_const, zero_const}; const Id cond = ctx.OpULessThanEqual(ctx.TypeBool(), buffer_offset, ctx.Const(0xFFFFu)); - const Id zero = ctx.OpCompositeConstruct(result_type, std::span(zero_vec.data(), num_elements)); + const Id zero = num_elements > 1 + ? ctx.OpCompositeConstruct(result_type, std::span(zero_vec.data(), num_elements)) + : zero_const; return ctx.OpSelect(result_type, cond, val, zero); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp index 795e5a1c84..5a60c90194 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp @@ -23,28 +23,28 @@ enum class SurfaceLoadStoreType : u64 { _3D, }; -constexpr unsigned R = 1 << 0; -constexpr unsigned G = 1 << 1; -constexpr unsigned B = 1 << 2; -constexpr unsigned A = 1 << 3; +constexpr unsigned SHADER_R = 1 << 0; +constexpr unsigned SHADER_G = 1 << 1; +constexpr unsigned SHADER_B = 1 << 2; +constexpr unsigned SHADER_A = 1 << 3; constexpr std::array MASK{ 0U, // - R, // - G, // - R | G, // - B, // - R | B, // - G | B, // - R | G | B, // - A, // - R | A, // - G | A, // - R | G | A, // - B | A, // - R | B | A, // - G | B | A, // - R | G | B | A, // + SHADER_R, // + SHADER_G, // + SHADER_R | SHADER_G, // + SHADER_B, // + SHADER_R | SHADER_B, // + SHADER_G | SHADER_B, // + SHADER_R | SHADER_G | SHADER_B, // + SHADER_A, // + SHADER_R | SHADER_A, // + SHADER_G | SHADER_A, // + SHADER_R | SHADER_G | SHADER_A, // + SHADER_B | SHADER_A, // + SHADER_R | SHADER_B | SHADER_A, // + SHADER_G | SHADER_B | SHADER_A, // + SHADER_R | SHADER_G | SHADER_B | SHADER_A, // }; enum class SurfaceLoadStoreSize : u64 { diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 43745af429..7de0898b58 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +# SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project # SPDX-License-Identifier: GPL-3.0-or-later # SPDX-FileCopyrightText: 2018 yuzu Emulator Project @@ -32,8 +32,10 @@ add_test(NAME tests COMMAND tests) # needed for vma if (NOT MSVC) target_compile_options(tests PRIVATE - -Wno-conversion - -Wno-unused-variable - -Wno-unused-parameter - -Wno-missing-field-initializers) + $<$:-Wno-conversion> + $<$:-Wno-unused-variable> + $<$:-Wno-unused-parameter> + $<$:-Wno-missing-field-initializers>) endif() + +set_target_properties(tests PROPERTIES UNITY_BUILD OFF) diff --git a/src/tests/common/bit_field.cpp b/src/tests/common/bit_field.cpp index 2d23ade06e..6fdabd9b3f 100644 --- a/src/tests/common/bit_field.cpp +++ b/src/tests/common/bit_field.cpp @@ -4,9 +4,6 @@ // SPDX-FileCopyrightText: 2019 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#define VMA_IMPLEMENTATION -#include "video_core/vulkan_common/vma.h" - #include #include #include @@ -93,3 +90,6 @@ TEST_CASE("BitField", "[common]") { 0b11000111, }}); } + +#define VMA_IMPLEMENTATION +#include "video_core/vulkan_common/vma.h" diff --git a/src/tests/input_common/calibration_configuration_job.cpp b/src/tests/input_common/calibration_configuration_job.cpp index 6ded0dfc86..84ec4fd3b2 100644 --- a/src/tests/input_common/calibration_configuration_job.cpp +++ b/src/tests/input_common/calibration_configuration_job.cpp @@ -103,7 +103,7 @@ TEST_CASE("CalibrationConfigurationJob completed", "[input_common]") { .y = 200, }}); - InputCommon::CemuhookUDP::CalibrationConfigurationJob::Status status{}; + InputCommon::CemuhookUDP::CalibrationConfigurationJob::CalibrationStatus status{}; u16 min_x{}; u16 min_y{}; u16 max_x{}; @@ -111,10 +111,10 @@ TEST_CASE("CalibrationConfigurationJob completed", "[input_common]") { InputCommon::CemuhookUDP::CalibrationConfigurationJob job( server.GetHost(), server.GetPort(), [&status, - &complete_event](InputCommon::CemuhookUDP::CalibrationConfigurationJob::Status status_) { + &complete_event](InputCommon::CemuhookUDP::CalibrationConfigurationJob::CalibrationStatus status_) { status = status_; if (status == - InputCommon::CemuhookUDP::CalibrationConfigurationJob::Status::Completed) { + InputCommon::CemuhookUDP::CalibrationConfigurationJob::CalibrationStatus::Completed) { complete_event.Set(); } }, @@ -126,7 +126,7 @@ TEST_CASE("CalibrationConfigurationJob completed", "[input_common]") { }); complete_event.WaitUntil(std::chrono::system_clock::now() + std::chrono::seconds(10)); - REQUIRE(status == InputCommon::CemuhookUDP::CalibrationConfigurationJob::Status::Completed); + REQUIRE(status == InputCommon::CemuhookUDP::CalibrationConfigurationJob::CalibrationStatus::Completed); REQUIRE(min_x == 0); REQUIRE(min_y == 0); REQUIRE(max_x == 200); diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 89b00d2ff9..205d02ca78 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -362,14 +362,14 @@ else() if (APPLE) # error: declaration shadows a typedef in 'interval_base_set' # error: implicit conversion loses integer precision: 'int' to 'boost::icl::bound_type' (aka 'unsigned char') - target_compile_options(video_core PRIVATE -Wno-shadow -Wno-unused-local-typedef) + target_compile_options(video_core PRIVATE + $<$:-Wno-shadow> + $<$:-Wno-unused-local-typedef>) else() - target_compile_options(video_core PRIVATE -Werror=conversion) + target_compile_options(video_core PRIVATE $<$:-Werror=conversion>) endif() - target_compile_options(video_core PRIVATE - -Wno-sign-conversion - ) + target_compile_options(video_core PRIVATE $<$:-Wno-sign-conversion>) # xbyak set_source_files_properties(macro/macro_jit_x64.cpp PROPERTIES COMPILE_OPTIONS "-Wno-conversion;-Wno-shadow") diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 668f939546..60b399ccba 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -118,8 +118,6 @@ if (NOT GLSLANG_ERROR STREQUAL "") set(QUIET_FLAG "") endif() -# Shader files must depend on their directory otherwise *BSD make will spontaneously combust -file(MAKE_DIRECTORY "${SHADER_DIR}") foreach(SOURCE_FILE IN ITEMS ${SHADER_FILES}) get_filename_component(FILENAME ${SOURCE_FILE} NAME) string(REPLACE "." "_" SHADER_NAME ${FILENAME}) @@ -135,7 +133,6 @@ foreach(SOURCE_FILE IN ITEMS ${SHADER_FILES}) ${SOURCE_FILE} DEPENDS ${INPUT_FILE} - ${SHADER_DIR} # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified ) set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE}) @@ -151,8 +148,6 @@ foreach(SOURCE_FILE IN ITEMS ${SHADER_FILES}) ${GLSLANGVALIDATOR} -V ${QUIET_FLAG} -I"${FIDELITYFX_INCLUDE_DIR}" ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} --target-env ${SPIR_V_VERSION} MAIN_DEPENDENCY ${SOURCE_FILE} - DEPENDS - ${SHADER_DIR} ) set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE}) endif() @@ -172,7 +167,6 @@ foreach(FILEPATH IN ITEMS ${FIDELITYFX_FILES}) ${SOURCE_FILE} DEPENDS ${INPUT_FILE} - ${SHADER_DIR} # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified ) set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE}) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e989bf6b31..75fbcaa968 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -49,7 +49,7 @@ using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage; -constexpr size_t MAX_IMAGE_ELEMENTS = 64; +constexpr size_t INLINE_IMAGE_ELEMENTS = 64; DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span infos) { DescriptorLayoutBuilder builder{device}; @@ -264,7 +264,11 @@ GraphicsPipeline::GraphicsPipeline( stage_infos[stage] = *info; enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + num_image_elements += Shader::NumDescriptors(info->texture_buffer_descriptors); + num_image_elements += Shader::NumDescriptors(info->image_buffer_descriptors); num_textures += Shader::NumDescriptors(info->texture_descriptors); + num_image_elements += Shader::NumDescriptors(info->texture_descriptors); + num_image_elements += Shader::NumDescriptors(info->image_descriptors); } fragment_has_color0_output = stage_infos[NUM_STAGES - 1].stores_frag_color[0]; auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] { @@ -310,10 +314,10 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { template bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { - std::array views; - std::array samplers; - size_t sampler_index{}; - size_t view_index{}; + small_vector views; + small_vector samplers; + views.reserve(num_image_elements); + samplers.reserve(num_textures); texture_cache.SynchronizeGraphicsDescriptors(); @@ -358,11 +362,11 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - views[view_index++] = { + views.push_back({ .index = handle.first, .blacklist = blacklist, .id = {} - }; + }); } }}; if constexpr (Spec::has_texture_buffers) { @@ -378,10 +382,10 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - views[view_index++] = {handle.first}; + views.push_back({handle.first}); VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)}; - samplers[sampler_index++] = sampler; + samplers.push_back(sampler); } } if constexpr (Spec::has_images) { @@ -407,7 +411,9 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (Spec::enabled_stages[4]) { config_stage(4); } - texture_cache.FillGraphicsImageViews(std::span(views.data(), view_index)); + ASSERT(views.size() == num_image_elements); + ASSERT(samplers.size() == num_textures); + texture_cache.FillGraphicsImageViews(std::span(views.data(), views.size())); VideoCommon::ImageViewInOut* texture_buffer_it{views.data()}; const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { @@ -501,7 +507,8 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { buffer_cache.any_buffer_uploaded = false; } texture_cache.UpdateRenderTargets(false); - texture_cache.CheckFeedbackLoop(views); + texture_cache.CheckFeedbackLoop(std::span{views.data(), + views.size()}); ConfigureDraw(rescaling, render_area); return true; @@ -987,7 +994,7 @@ void GraphicsPipeline::Validate() { num_images += Shader::NumDescriptors(info.texture_descriptors); num_images += Shader::NumDescriptors(info.image_descriptors); } - ASSERT(num_images <= MAX_IMAGE_ELEMENTS); + ASSERT(num_images == num_image_elements); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 34941d6e8d..1a41e50a36 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project @@ -159,6 +159,7 @@ private: std::array stage_infos; std::array enabled_uniform_buffer_masks{}; VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; + size_t num_image_elements{}; u32 num_textures{}; bool fragment_has_color0_output{}; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 947de6a80e..226619d8d6 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -270,8 +270,8 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, }; - upload_cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, WRITE_BARRIER); + upload_cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); upload_cmdbuf.End(); cmdbuf.End(); @@ -373,8 +373,8 @@ void Scheduler::EndRenderPass() } cmdbuf.EndRenderPass(); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, nullptr, nullptr, vk::Span(barriers.data(), num_images)); + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, nullptr, nullptr, vk::Span(barriers.data(), num_images)); }); state.renderpass = VkRenderPass{}; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 5d55cf551b..0e4c274d94 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -9,6 +9,10 @@ #include #include +#ifdef __ANDROID__ +#include +#endif + #include "common/logging.h" #include "common/settings.h" #include "common/settings_enums.h" @@ -170,6 +174,7 @@ bool Swapchain::AcquireNextImage() { break; } + const auto wait_with_frame_pacing = [this] { switch (Settings::values.frame_pacing_mode.GetValue()) { case Settings::FramePacingMode::Target_Auto: scheduler.Wait(resource_ticks[image_index]); @@ -187,6 +192,17 @@ bool Swapchain::AcquireNextImage() { scheduler.Wait(resource_ticks[image_index], 120.0); break; } + }; + +#ifdef __ANDROID__ + if (android_get_device_api_level() >= 30) { + scheduler.Wait(resource_ticks[image_index]); + } else { + wait_with_frame_pacing(); + } +#else + wait_with_frame_pacing(); +#endif resource_ticks[image_index] = scheduler.CurrentTick(); diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index b4a9661e13..e5b1a816c1 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -2,7 +2,7 @@ # SPDX-License-Identifier: GPL-3.0-or-later set(CMAKE_AUTOMOC ON) -set(CMAKE_AUTORCC OFF) +set(CMAKE_AUTORCC ON) set(CMAKE_AUTOUIC ON) set(CMAKE_INCLUDE_CURRENT_DIR ON) @@ -269,10 +269,6 @@ file(GLOB COMPAT_LIST file(GLOB_RECURSE ICONS ${PROJECT_SOURCE_DIR}/dist/icons/*) file(GLOB_RECURSE THEMES ${PROJECT_SOURCE_DIR}/dist/qt_themes/*) -file(GLOB QRC_COMPAT_LIST ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.qrc) -file(GLOB_RECURSE QRC_ICONS ${PROJECT_SOURCE_DIR}/dist/icons/*.qrc) -file(GLOB_RECURSE QRC_THEMES ${PROJECT_SOURCE_DIR}/dist/qt_themes/*.qrc) - if (ENABLE_UPDATE_CHECKER) target_compile_definitions(yuzu PUBLIC ENABLE_UPDATE_CHECKER) endif() @@ -345,45 +341,48 @@ if (ENABLE_QT_TRANSLATION) qt_add_translation(LANGUAGES_QM ${PROJECT_SOURCE_DIR}/dist/english_plurals/en.ts) # Build a QRC file from the QM file list - set(QRC_LANGUAGES ${CMAKE_CURRENT_BINARY_DIR}/languages.qrc) - file(WRITE ${QRC_LANGUAGES} "\n") + set(LANGUAGES_QRC ${CMAKE_CURRENT_BINARY_DIR}/languages.qrc) + file(WRITE ${LANGUAGES_QRC} "\n") foreach (QM ${LANGUAGES_QM}) get_filename_component(QM_FILE ${QM} NAME) - file(APPEND ${QRC_LANGUAGES} "${QM_FILE}\n") + file(APPEND ${LANGUAGES_QRC} "${QM_FILE}\n") endforeach (QM) - file(APPEND ${QRC_LANGUAGES} "") + file(APPEND ${LANGUAGES_QRC} "") + + # Add the QRC file to package in all QM files + qt_add_resources(LANGUAGES ${LANGUAGES_QRC}) else() set(LANGUAGES) endif() -target_sources(yuzu PRIVATE - ${COMPAT_LIST} - ${ICONS} - ${THEMES} - ${LANGUAGES} -) - -# Add the QRC file to package in all QM files -qt_add_resources( - ${QRC_COMPAT_LIST} - ${QRC_ICONS} - ${QRC_THEMES} - LANGUAGES ${QRC_LANGUAGES} - OUTPUT_TARGETS ${QRC_OUTPUT_TARGETS} -) -set_source_files_properties(${QRC_OUTPUT_TARGETS} PROPERTIES SKIP_UNITY_BUILD_INCLUSION ON) -target_sources(yuzu PRIVATE ${QRC_OUTPUT_TARGETS}) +target_sources(yuzu + PRIVATE + ${COMPAT_LIST} + ${ICONS} + ${LANGUAGES} + ${THEMES}) +# TODO(crueter): Move Assets.car/icon/icns handling to its own Module. if (APPLE) - # Normal icns - set(MACOSX_ICON "${CMAKE_SOURCE_DIR}/dist/eden.icns") - set_source_files_properties(${MACOSX_ICON} PROPERTIES MACOSX_PACKAGE_LOCATION Resources) - target_sources(yuzu PRIVATE ${MACOSX_ICON}) + # Xcode will automatically generate the Assets.car and icns file for us. + set(_dist "${CMAKE_SOURCE_DIR}/dist") + if (CMAKE_GENERATOR MATCHES "Xcode") + set(_icons "${_dist}/eden.icon") - # Liquid glass - set(MACOSX_LIQUID_GLASS_ICON "${CMAKE_SOURCE_DIR}/dist/Assets.car") - set_source_files_properties(${MACOSX_LIQUID_GLASS_ICON} PROPERTIES MACOSX_PACKAGE_LOCATION Resources) - target_sources(yuzu PRIVATE ${MACOSX_LIQUID_GLASS_ICON}) + set_target_properties(eden PROPERTIES + XCODE_ATTRIBUTE_ASSETCATALOG_COMPILER_APPICON_NAME eden + MACOSX_BUNDLE_ICON_FILE eden + # Also force xcode to manage signing for us. + XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED ON + XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED ON) + # Otherwise, we'll use our own. + else() + set(_icons "${_dist}/eden.icns" "${_dist}/Assets.car") + endif() + + set_source_files_properties(${_icons} PROPERTIES + MACOSX_PACKAGE_LOCATION Resources) + target_sources(yuzu PRIVATE ${_icons}) set_target_properties(yuzu PROPERTIES MACOSX_BUNDLE TRUE) set_target_properties(yuzu PROPERTIES MACOSX_BUNDLE_INFO_PLIST ${CMAKE_CURRENT_SOURCE_DIR}/Info.plist) @@ -455,13 +454,15 @@ endif() if (NOT MSVC AND (APPLE OR NOT YUZU_STATIC_BUILD)) # needed for vma target_compile_options(yuzu PRIVATE - -Wno-conversion - -Wno-unused-variable - -Wno-unused-parameter - -Wno-missing-field-initializers) + $<$:-Wno-conversion> + $<$:-Wno-unused-variable> + $<$:-Wno-unused-parameter> + $<$:-Wno-missing-field-initializers>) endif() # Remember that the linker is incredibly stupid. target_link_libraries(yuzu PRIVATE OpenSSL::SSL OpenSSL::Crypto SDL2::SDL2) +set_target_properties(yuzu PROPERTIES UNITY_BUILD OFF) + create_target_directory_groups(yuzu) diff --git a/src/yuzu/configuration/configure_motion_touch.cpp b/src/yuzu/configuration/configure_motion_touch.cpp index d1bcac12aa..d462c4c0c6 100644 --- a/src/yuzu/configuration/configure_motion_touch.cpp +++ b/src/yuzu/configuration/configure_motion_touch.cpp @@ -37,17 +37,17 @@ CalibrationConfigurationDialog::CalibrationConfigurationDialog(QWidget* parent, using namespace InputCommon::CemuhookUDP; job = std::make_unique( host, port, - [this](CalibrationConfigurationJob::Status status) { + [this](CalibrationConfigurationJob::CalibrationStatus status) { QMetaObject::invokeMethod(this, [status, this] { QString text; switch (status) { - case CalibrationConfigurationJob::Status::Ready: + case CalibrationConfigurationJob::CalibrationStatus::Ready: text = tr("Touch the top left corner
of your touchpad."); break; - case CalibrationConfigurationJob::Status::Stage1Completed: + case CalibrationConfigurationJob::CalibrationStatus::Stage1Completed: text = tr("Now touch the bottom right corner
of your touchpad."); break; - case CalibrationConfigurationJob::Status::Completed: + case CalibrationConfigurationJob::CalibrationStatus::Completed: text = tr("Configuration completed!"); break; default: @@ -55,7 +55,7 @@ CalibrationConfigurationDialog::CalibrationConfigurationDialog(QWidget* parent, } UpdateLabelText(text); }); - if (status == CalibrationConfigurationJob::Status::Completed) { + if (status == CalibrationConfigurationJob::CalibrationStatus::Completed) { QMetaObject::invokeMethod(this, [this] { UpdateButtonText(tr("OK")); }); } }, diff --git a/src/yuzu/user_data_migration.h b/src/yuzu/user_data_migration.h index df8057eaa5..3b404a1a14 100644 --- a/src/yuzu/user_data_migration.h +++ b/src/yuzu/user_data_migration.h @@ -8,7 +8,7 @@ #pragma once #include -#include "../yuzu/migration_worker.h" +#include "yuzu/migration_worker.h" // TODO(crueter): Quick implementation class UserDataMigrator { diff --git a/src/yuzu_cmd/CMakeLists.txt b/src/yuzu_cmd/CMakeLists.txt index a1f16be75c..4aa90fc91a 100644 --- a/src/yuzu_cmd/CMakeLists.txt +++ b/src/yuzu_cmd/CMakeLists.txt @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +# SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project # SPDX-License-Identifier: GPL-3.0-or-later # SPDX-FileCopyrightText: 2018 yuzu Emulator Project @@ -55,13 +55,15 @@ if(WIN32) endif() endif() -create_target_directory_groups(yuzu-cmd) - # needed for vma if (NOT MSVC) target_compile_options(yuzu-cmd PRIVATE - -Wno-conversion - -Wno-unused-variable - -Wno-unused-parameter - -Wno-missing-field-initializers) + $<$:-Wno-conversion> + $<$:-Wno-unused-variable> + $<$:-Wno-unused-parameter> + $<$:-Wno-missing-field-initializers>) endif() + +set_target_properties(yuzu-cmd PROPERTIES UNITY_BUILD OFF) + +create_target_directory_groups(yuzu-cmd) diff --git a/tools/gendynarm.cpp b/tools/gendynarm.cpp new file mode 100644 index 0000000000..3d1588e7e8 --- /dev/null +++ b/tools/gendynarm.cpp @@ -0,0 +1,1646 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace mcl { +template +constexpr std::size_t bitsizeof = CHAR_BIT * sizeof(T); +} +namespace mcl::bit { +template +inline size_t count_ones(T x) { + return std::bitset>(x).count(); +} +} +template +inline consteval std::array StringToArray(const char (&str)[N + 1]) { + std::array result{}; + for (size_t i = 0; i < N; i++) { + result[i] = str[i]; + } + return result; +} +using opcode_type = uint32_t; +constexpr size_t opcode_bitsize = mcl::bitsizeof; +static opcode_type GetMaskAndExpect(std::string bitstring) { + const auto one = opcode_type(1); + opcode_type mask = 0; + for (size_t i = 0; i < opcode_bitsize; i++) { + const size_t bit_position = opcode_bitsize - i - 1; + switch (bitstring[i]) { + case '0': mask |= one << bit_position; break; + case '1': mask |= one << bit_position; break; + default: break; + } + } + return mask; +} +struct SortingInfo { + const char *first; + uint32_t second; + const char *inst_final; +}; +int main(int argc, char *argv[]) { + auto which = std::string{argv[1]}; + std::printf( +"// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project\n" +"// SPDX-License-Identifier: GPL-3.0-or-later\n" +"// DO NOT REORDER\n"); + if (which == "-arm") { + std::vector list = { +#define INST(fn, name, bitstring) { name, GetMaskAndExpect(bitstring), "INST(" #fn ", " #name ", " #bitstring ")" }, +// Barrier instructions +INST(arm_DMB, "DMB", "1111010101111111111100000101oooo") // v7 +INST(arm_DSB, "DSB", "1111010101111111111100000100oooo") // v7 +INST(arm_ISB, "ISB", "1111010101111111111100000110oooo") // v7 + +// Branch instructions +INST(arm_BLX_imm, "BLX (imm)", "1111101hvvvvvvvvvvvvvvvvvvvvvvvv") // v5 +INST(arm_BLX_reg, "BLX (reg)", "cccc000100101111111111110011mmmm") // v5 +INST(arm_B, "B", "cccc1010vvvvvvvvvvvvvvvvvvvvvvvv") // v1 +INST(arm_BL, "BL", "cccc1011vvvvvvvvvvvvvvvvvvvvvvvv") // v1 +INST(arm_BX, "BX", "cccc000100101111111111110001mmmm") // v4T +INST(arm_BXJ, "BXJ", "cccc000100101111111111110010mmmm") // v5J + +// CRC32 instructions +INST(arm_CRC32, "CRC32", "cccc00010zz0nnnndddd00000100mmmm") // v8 +INST(arm_CRC32C, "CRC32C", "cccc00010zz0nnnndddd00100100mmmm") // v8 + +// Coprocessor instructions +INST(arm_CDP, "CDP", "cccc1110ooooNNNNDDDDppppooo0MMMM") // v2 (CDP2: v5) +INST(arm_LDC, "LDC", "cccc110pudw1nnnnDDDDppppvvvvvvvv") // v2 (LDC2: v5) +INST(arm_MCR, "MCR", "cccc1110ooo0NNNNttttppppooo1MMMM") // v2 (MCR2: v5) +INST(arm_MCRR, "MCRR", "cccc11000100uuuuttttppppooooMMMM") // v5E (MCRR2: v6) +INST(arm_MRC, "MRC", "cccc1110ooo1NNNNttttppppooo1MMMM") // v2 (MRC2: v5) +INST(arm_MRRC, "MRRC", "cccc11000101uuuuttttppppooooMMMM") // v5E (MRRC2: v6) +INST(arm_STC, "STC", "cccc110pudw0nnnnDDDDppppvvvvvvvv") // v2 (STC2: v5) + +// Data Processing instructions +INST(arm_ADC_imm, "ADC (imm)", "cccc0010101Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_ADC_reg, "ADC (reg)", "cccc0000101Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_ADC_rsr, "ADC (rsr)", "cccc0000101Snnnnddddssss0rr1mmmm") // v1 +INST(arm_ADD_imm, "ADD (imm)", "cccc0010100Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_ADD_reg, "ADD (reg)", "cccc0000100Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_ADD_rsr, "ADD (rsr)", "cccc0000100Snnnnddddssss0rr1mmmm") // v1 +INST(arm_AND_imm, "AND (imm)", "cccc0010000Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_AND_reg, "AND (reg)", "cccc0000000Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_AND_rsr, "AND (rsr)", "cccc0000000Snnnnddddssss0rr1mmmm") // v1 +INST(arm_BIC_imm, "BIC (imm)", "cccc0011110Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_BIC_reg, "BIC (reg)", "cccc0001110Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_BIC_rsr, "BIC (rsr)", "cccc0001110Snnnnddddssss0rr1mmmm") // v1 +INST(arm_CMN_imm, "CMN (imm)", "cccc00110111nnnn0000rrrrvvvvvvvv") // v1 +INST(arm_CMN_reg, "CMN (reg)", "cccc00010111nnnn0000vvvvvrr0mmmm") // v1 +INST(arm_CMN_rsr, "CMN (rsr)", "cccc00010111nnnn0000ssss0rr1mmmm") // v1 +INST(arm_CMP_imm, "CMP (imm)", "cccc00110101nnnn0000rrrrvvvvvvvv") // v1 +INST(arm_CMP_reg, "CMP (reg)", "cccc00010101nnnn0000vvvvvrr0mmmm") // v1 +INST(arm_CMP_rsr, "CMP (rsr)", "cccc00010101nnnn0000ssss0rr1mmmm") // v1 +INST(arm_EOR_imm, "EOR (imm)", "cccc0010001Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_EOR_reg, "EOR (reg)", "cccc0000001Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_EOR_rsr, "EOR (rsr)", "cccc0000001Snnnnddddssss0rr1mmmm") // v1 +INST(arm_MOV_imm, "MOV (imm)", "cccc0011101S0000ddddrrrrvvvvvvvv") // v1 +INST(arm_MOV_reg, "MOV (reg)", "cccc0001101S0000ddddvvvvvrr0mmmm") // v1 +INST(arm_MOV_rsr, "MOV (rsr)", "cccc0001101S0000ddddssss0rr1mmmm") // v1 +INST(arm_MVN_imm, "MVN (imm)", "cccc0011111S0000ddddrrrrvvvvvvvv") // v1 +INST(arm_MVN_reg, "MVN (reg)", "cccc0001111S0000ddddvvvvvrr0mmmm") // v1 +INST(arm_MVN_rsr, "MVN (rsr)", "cccc0001111S0000ddddssss0rr1mmmm") // v1 +INST(arm_ORR_imm, "ORR (imm)", "cccc0011100Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_ORR_reg, "ORR (reg)", "cccc0001100Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_ORR_rsr, "ORR (rsr)", "cccc0001100Snnnnddddssss0rr1mmmm") // v1 +INST(arm_RSB_imm, "RSB (imm)", "cccc0010011Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_RSB_reg, "RSB (reg)", "cccc0000011Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_RSB_rsr, "RSB (rsr)", "cccc0000011Snnnnddddssss0rr1mmmm") // v1 +INST(arm_RSC_imm, "RSC (imm)", "cccc0010111Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_RSC_reg, "RSC (reg)", "cccc0000111Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_RSC_rsr, "RSC (rsr)", "cccc0000111Snnnnddddssss0rr1mmmm") // v1 +INST(arm_SBC_imm, "SBC (imm)", "cccc0010110Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_SBC_reg, "SBC (reg)", "cccc0000110Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_SBC_rsr, "SBC (rsr)", "cccc0000110Snnnnddddssss0rr1mmmm") // v1 +INST(arm_SUB_imm, "SUB (imm)", "cccc0010010Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_SUB_reg, "SUB (reg)", "cccc0000010Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_SUB_rsr, "SUB (rsr)", "cccc0000010Snnnnddddssss0rr1mmmm") // v1 +INST(arm_TEQ_imm, "TEQ (imm)", "cccc00110011nnnn0000rrrrvvvvvvvv") // v1 +INST(arm_TEQ_reg, "TEQ (reg)", "cccc00010011nnnn0000vvvvvrr0mmmm") // v1 +INST(arm_TEQ_rsr, "TEQ (rsr)", "cccc00010011nnnn0000ssss0rr1mmmm") // v1 +INST(arm_TST_imm, "TST (imm)", "cccc00110001nnnn0000rrrrvvvvvvvv") // v1 +INST(arm_TST_reg, "TST (reg)", "cccc00010001nnnn0000vvvvvrr0mmmm") // v1 +INST(arm_TST_rsr, "TST (rsr)", "cccc00010001nnnn0000ssss0rr1mmmm") // v1 + +// Exception Generating instructions +INST(arm_BKPT, "BKPT", "cccc00010010vvvvvvvvvvvv0111vvvv") // v5 +INST(arm_SVC, "SVC", "cccc1111vvvvvvvvvvvvvvvvvvvvvvvv") // v1 +INST(arm_UDF, "UDF", "111001111111------------1111----") + +// Extension instructions +INST(arm_SXTB, "SXTB", "cccc011010101111ddddrr000111mmmm") // v6 +INST(arm_SXTB16, "SXTB16", "cccc011010001111ddddrr000111mmmm") // v6 +INST(arm_SXTH, "SXTH", "cccc011010111111ddddrr000111mmmm") // v6 +INST(arm_SXTAB, "SXTAB", "cccc01101010nnnnddddrr000111mmmm") // v6 +INST(arm_SXTAB16, "SXTAB16", "cccc01101000nnnnddddrr000111mmmm") // v6 +INST(arm_SXTAH, "SXTAH", "cccc01101011nnnnddddrr000111mmmm") // v6 +INST(arm_UXTB, "UXTB", "cccc011011101111ddddrr000111mmmm") // v6 +INST(arm_UXTB16, "UXTB16", "cccc011011001111ddddrr000111mmmm") // v6 +INST(arm_UXTH, "UXTH", "cccc011011111111ddddrr000111mmmm") // v6 +INST(arm_UXTAB, "UXTAB", "cccc01101110nnnnddddrr000111mmmm") // v6 +INST(arm_UXTAB16, "UXTAB16", "cccc01101100nnnnddddrr000111mmmm") // v6 +INST(arm_UXTAH, "UXTAH", "cccc01101111nnnnddddrr000111mmmm") // v6 + +// Hint instructions +INST(arm_PLD_imm, "PLD (imm)", "11110101uz01nnnn1111iiiiiiiiiiii") // v5E for PLD; v7 for PLDW +INST(arm_PLD_reg, "PLD (reg)", "11110111uz01nnnn1111iiiiitt0mmmm") // v5E for PLD; v7 for PLDW +INST(arm_SEV, "SEV", "----0011001000001111000000000100") // v6K +INST(arm_SEVL, "SEVL", "----0011001000001111000000000101") // v8 +INST(arm_WFE, "WFE", "----0011001000001111000000000010") // v6K +INST(arm_WFI, "WFI", "----0011001000001111000000000011") // v6K +INST(arm_YIELD, "YIELD", "----0011001000001111000000000001") // v6K +INST(arm_NOP, "Reserved Hint", "----0011001000001111------------") +INST(arm_NOP, "Reserved Hint", "----001100100000111100000000----") + +// Synchronization Primitive instructions +INST(arm_CLREX, "CLREX", "11110101011111111111000000011111") // v6K +INST(arm_SWP, "SWP", "cccc00010000nnnntttt00001001uuuu") // v2S (v6: Deprecated) +INST(arm_SWPB, "SWPB", "cccc00010100nnnntttt00001001uuuu") // v2S (v6: Deprecated) +INST(arm_STL, "STL", "cccc00011000nnnn111111001001tttt") // v8 +INST(arm_STLEX, "STLEX", "cccc00011000nnnndddd11101001tttt") // v8 +INST(arm_STREX, "STREX", "cccc00011000nnnndddd11111001mmmm") // v6 +INST(arm_LDA, "LDA", "cccc00011001nnnndddd110010011111") // v8 +INST(arm_LDAEX, "LDAEX", "cccc00011001nnnndddd111010011111") // v8 +INST(arm_LDREX, "LDREX", "cccc00011001nnnndddd111110011111") // v6 +INST(arm_STLEXD, "STLEXD", "cccc00011010nnnndddd11101001mmmm") // v8 +INST(arm_STREXD, "STREXD", "cccc00011010nnnndddd11111001mmmm") // v6K +INST(arm_LDAEXD, "LDAEXD", "cccc00011011nnnndddd111010011111") // v8 +INST(arm_LDREXD, "LDREXD", "cccc00011011nnnndddd111110011111") // v6K +INST(arm_STLB, "STLB", "cccc00011100nnnn111111001001tttt") // v8 +INST(arm_STLEXB, "STLEXB", "cccc00011100nnnndddd11101001mmmm") // v8 +INST(arm_STREXB, "STREXB", "cccc00011100nnnndddd11111001mmmm") // v6K +INST(arm_LDAB, "LDAB", "cccc00011101nnnndddd110010011111") // v8 +INST(arm_LDAEXB, "LDAEXB", "cccc00011101nnnndddd111010011111") // v8 +INST(arm_LDREXB, "LDREXB", "cccc00011101nnnndddd111110011111") // v6K +INST(arm_STLH, "STLH", "cccc00011110nnnn111111001001mmmm") // v8 +INST(arm_STLEXH, "STLEXH", "cccc00011110nnnndddd11101001mmmm") // v8 +INST(arm_STREXH, "STREXH", "cccc00011110nnnndddd11111001mmmm") // v6K +INST(arm_LDAH, "LDAH", "cccc00011111nnnndddd110010011111") // v8 +INST(arm_LDAEXH, "LDAEXH", "cccc00011111nnnndddd111010011111") // v8 +INST(arm_LDREXH, "LDREXH", "cccc00011111nnnndddd111110011111") // v6K + +// Load/Store instructions +INST(arm_LDRBT, "LDRBT (A1)", "----0100-111--------------------") // v1 +INST(arm_LDRBT, "LDRBT (A2)", "----0110-111---------------0----") // v1 +INST(arm_LDRHT, "LDRHT (A1)", "----0000-111------------1011----") // v6T2 +INST(arm_LDRHT, "LDRHT (A1)", "----0000-1111111--------1011----") // v6T2 +INST(arm_LDRHT, "LDRHT (A2)", "----0000-011--------00001011----") // v6T2 +INST(arm_LDRSBT, "LDRSBT (A1)", "----0000-111------------1101----") // v6T2 +INST(arm_LDRSBT, "LDRSBT (A2)", "----0000-011--------00001101----") // v6T2 +INST(arm_LDRSHT, "LDRSHT (A1)", "----0000-111------------1111----") // v6T2 +INST(arm_LDRSHT, "LDRSHT (A2)", "----0000-011--------00001111----") // v6T2 +INST(arm_LDRT, "LDRT (A1)", "----0100-011--------------------") // v1 +INST(arm_LDRT, "LDRT (A2)", "----0110-011---------------0----") // v1 +INST(arm_STRBT, "STRBT (A1)", "----0100-110--------------------") // v1 +INST(arm_STRBT, "STRBT (A2)", "----0110-110---------------0----") // v1 +INST(arm_STRHT, "STRHT (A1)", "----0000-110------------1011----") // v6T2 +INST(arm_STRHT, "STRHT (A2)", "----0000-010--------00001011----") // v6T2 +INST(arm_STRT, "STRT (A1)", "----0100-010--------------------") // v1 +INST(arm_STRT, "STRT (A2)", "----0110-010---------------0----") // v1 +INST(arm_LDR_lit, "LDR (lit)", "cccc0101u0011111ttttvvvvvvvvvvvv") // v1 +INST(arm_LDR_imm, "LDR (imm)", "cccc010pu0w1nnnnttttvvvvvvvvvvvv") // v1 +INST(arm_LDR_reg, "LDR (reg)", "cccc011pu0w1nnnnttttvvvvvrr0mmmm") // v1 +INST(arm_LDRB_lit, "LDRB (lit)", "cccc0101u1011111ttttvvvvvvvvvvvv") // v1 +INST(arm_LDRB_imm, "LDRB (imm)", "cccc010pu1w1nnnnttttvvvvvvvvvvvv") // v1 +INST(arm_LDRB_reg, "LDRB (reg)", "cccc011pu1w1nnnnttttvvvvvrr0mmmm") // v1 +INST(arm_LDRD_lit, "LDRD (lit)", "cccc0001u1001111ttttvvvv1101vvvv") // v5E +INST(arm_LDRD_imm, "LDRD (imm)", "cccc000pu1w0nnnnttttvvvv1101vvvv") // v5E +INST(arm_LDRD_reg, "LDRD (reg)", "cccc000pu0w0nnnntttt00001101mmmm") // v5E +INST(arm_LDRH_lit, "LDRH (lit)", "cccc000pu1w11111ttttvvvv1011vvvv") // v4 +INST(arm_LDRH_imm, "LDRH (imm)", "cccc000pu1w1nnnnttttvvvv1011vvvv") // v4 +INST(arm_LDRH_reg, "LDRH (reg)", "cccc000pu0w1nnnntttt00001011mmmm") // v4 +INST(arm_LDRSB_lit, "LDRSB (lit)", "cccc0001u1011111ttttvvvv1101vvvv") // v4 +INST(arm_LDRSB_imm, "LDRSB (imm)", "cccc000pu1w1nnnnttttvvvv1101vvvv") // v4 +INST(arm_LDRSB_reg, "LDRSB (reg)", "cccc000pu0w1nnnntttt00001101mmmm") // v4 +INST(arm_LDRSH_lit, "LDRSH (lit)", "cccc0001u1011111ttttvvvv1111vvvv") // v4 +INST(arm_LDRSH_imm, "LDRSH (imm)", "cccc000pu1w1nnnnttttvvvv1111vvvv") // v4 +INST(arm_LDRSH_reg, "LDRSH (reg)", "cccc000pu0w1nnnntttt00001111mmmm") // v4 +INST(arm_STR_imm, "STR (imm)", "cccc010pu0w0nnnnttttvvvvvvvvvvvv") // v1 +INST(arm_STR_reg, "STR (reg)", "cccc011pu0w0nnnnttttvvvvvrr0mmmm") // v1 +INST(arm_STRB_imm, "STRB (imm)", "cccc010pu1w0nnnnttttvvvvvvvvvvvv") // v1 +INST(arm_STRB_reg, "STRB (reg)", "cccc011pu1w0nnnnttttvvvvvrr0mmmm") // v1 +INST(arm_STRD_imm, "STRD (imm)", "cccc000pu1w0nnnnttttvvvv1111vvvv") // v5E +INST(arm_STRD_reg, "STRD (reg)", "cccc000pu0w0nnnntttt00001111mmmm") // v5E +INST(arm_STRH_imm, "STRH (imm)", "cccc000pu1w0nnnnttttvvvv1011vvvv") // v4 +INST(arm_STRH_reg, "STRH (reg)", "cccc000pu0w0nnnntttt00001011mmmm") // v4 + +// Load/Store Multiple instructions +INST(arm_LDM, "LDM", "cccc100010w1nnnnxxxxxxxxxxxxxxxx") // v1 +INST(arm_LDMDA, "LDMDA", "cccc100000w1nnnnxxxxxxxxxxxxxxxx") // v1 +INST(arm_LDMDB, "LDMDB", "cccc100100w1nnnnxxxxxxxxxxxxxxxx") // v1 +INST(arm_LDMIB, "LDMIB", "cccc100110w1nnnnxxxxxxxxxxxxxxxx") // v1 +INST(arm_LDM_usr, "LDM (usr reg)", "----100--101--------------------") // v1 +INST(arm_LDM_eret, "LDM (exce ret)", "----100--1-1----1---------------") // v1 +INST(arm_STM, "STM", "cccc100010w0nnnnxxxxxxxxxxxxxxxx") // v1 +INST(arm_STMDA, "STMDA", "cccc100000w0nnnnxxxxxxxxxxxxxxxx") // v1 +INST(arm_STMDB, "STMDB", "cccc100100w0nnnnxxxxxxxxxxxxxxxx") // v1 +INST(arm_STMIB, "STMIB", "cccc100110w0nnnnxxxxxxxxxxxxxxxx") // v1 +INST(arm_STM_usr, "STM (usr reg)", "----100--100--------------------") // v1 + +// Miscellaneous instructions +INST(arm_BFC, "BFC", "cccc0111110vvvvvddddvvvvv0011111") // v6T2 +INST(arm_BFI, "BFI", "cccc0111110vvvvvddddvvvvv001nnnn") // v6T2 +INST(arm_CLZ, "CLZ", "cccc000101101111dddd11110001mmmm") // v5 +INST(arm_MOVT, "MOVT", "cccc00110100vvvvddddvvvvvvvvvvvv") // v6T2 +INST(arm_MOVW, "MOVW", "cccc00110000vvvvddddvvvvvvvvvvvv") // v6T2 +INST(arm_NOP, "NOP", "----0011001000001111000000000000") // v6K +INST(arm_SBFX, "SBFX", "cccc0111101wwwwwddddvvvvv101nnnn") // v6T2 +INST(arm_SEL, "SEL", "cccc01101000nnnndddd11111011mmmm") // v6 +INST(arm_UBFX, "UBFX", "cccc0111111wwwwwddddvvvvv101nnnn") // v6T2 + +// Unsigned Sum of Absolute Differences instructions +INST(arm_USAD8, "USAD8", "cccc01111000dddd1111mmmm0001nnnn") // v6 +INST(arm_USADA8, "USADA8", "cccc01111000ddddaaaammmm0001nnnn") // v6 + +// Packing instructions +INST(arm_PKHBT, "PKHBT", "cccc01101000nnnnddddvvvvv001mmmm") // v6K +INST(arm_PKHTB, "PKHTB", "cccc01101000nnnnddddvvvvv101mmmm") // v6K + +// Reversal instructions +INST(arm_RBIT, "RBIT", "cccc011011111111dddd11110011mmmm") // v6T2 +INST(arm_REV, "REV", "cccc011010111111dddd11110011mmmm") // v6 +INST(arm_REV16, "REV16", "cccc011010111111dddd11111011mmmm") // v6 +INST(arm_REVSH, "REVSH", "cccc011011111111dddd11111011mmmm") // v6 + +// Saturation instructions +INST(arm_SSAT, "SSAT", "cccc0110101vvvvvddddvvvvvr01nnnn") // v6 +INST(arm_SSAT16, "SSAT16", "cccc01101010vvvvdddd11110011nnnn") // v6 +INST(arm_USAT, "USAT", "cccc0110111vvvvvddddvvvvvr01nnnn") // v6 +INST(arm_USAT16, "USAT16", "cccc01101110vvvvdddd11110011nnnn") // v6 + +// Divide instructions +INST(arm_SDIV, "SDIV", "cccc01110001dddd1111mmmm0001nnnn") // v7a +INST(arm_UDIV, "UDIV", "cccc01110011dddd1111mmmm0001nnnn") // v7a + +// Multiply (Normal) instructions +INST(arm_MLA, "MLA", "cccc0000001Sddddaaaammmm1001nnnn") // v2 +INST(arm_MLS, "MLS", "cccc00000110ddddaaaammmm1001nnnn") // v6T2 +INST(arm_MUL, "MUL", "cccc0000000Sdddd0000mmmm1001nnnn") // v2 + +// Multiply (Long) instructions +INST(arm_SMLAL, "SMLAL", "cccc0000111Sddddaaaammmm1001nnnn") // v3M +INST(arm_SMULL, "SMULL", "cccc0000110Sddddaaaammmm1001nnnn") // v3M +INST(arm_UMAAL, "UMAAL", "cccc00000100ddddaaaammmm1001nnnn") // v6 +INST(arm_UMLAL, "UMLAL", "cccc0000101Sddddaaaammmm1001nnnn") // v3M +INST(arm_UMULL, "UMULL", "cccc0000100Sddddaaaammmm1001nnnn") // v3M + +// Multiply (Halfword) instructions +INST(arm_SMLALxy, "SMLALXY", "cccc00010100ddddaaaammmm1xy0nnnn") // v5xP +INST(arm_SMLAxy, "SMLAXY", "cccc00010000ddddaaaammmm1xy0nnnn") // v5xP +INST(arm_SMULxy, "SMULXY", "cccc00010110dddd0000mmmm1xy0nnnn") // v5xP + +// Multiply (Word by Halfword) instructions +INST(arm_SMLAWy, "SMLAWY", "cccc00010010ddddaaaammmm1y00nnnn") // v5xP +INST(arm_SMULWy, "SMULWY", "cccc00010010dddd0000mmmm1y10nnnn") // v5xP + +// Multiply (Most Significant Word) instructions +INST(arm_SMMUL, "SMMUL", "cccc01110101dddd1111mmmm00R1nnnn") // v6 +INST(arm_SMMLA, "SMMLA", "cccc01110101ddddaaaammmm00R1nnnn") // v6 +INST(arm_SMMLS, "SMMLS", "cccc01110101ddddaaaammmm11R1nnnn") // v6 + +// Multiply (Dual) instructions +INST(arm_SMLAD, "SMLAD", "cccc01110000ddddaaaammmm00M1nnnn") // v6 +INST(arm_SMLALD, "SMLALD", "cccc01110100ddddaaaammmm00M1nnnn") // v6 +INST(arm_SMLSD, "SMLSD", "cccc01110000ddddaaaammmm01M1nnnn") // v6 +INST(arm_SMLSLD, "SMLSLD", "cccc01110100ddddaaaammmm01M1nnnn") // v6 +INST(arm_SMUAD, "SMUAD", "cccc01110000dddd1111mmmm00M1nnnn") // v6 +INST(arm_SMUSD, "SMUSD", "cccc01110000dddd1111mmmm01M1nnnn") // v6 + +// Parallel Add/Subtract (Modulo) instructions +INST(arm_SADD8, "SADD8", "cccc01100001nnnndddd11111001mmmm") // v6 +INST(arm_SADD16, "SADD16", "cccc01100001nnnndddd11110001mmmm") // v6 +INST(arm_SASX, "SASX", "cccc01100001nnnndddd11110011mmmm") // v6 +INST(arm_SSAX, "SSAX", "cccc01100001nnnndddd11110101mmmm") // v6 +INST(arm_SSUB8, "SSUB8", "cccc01100001nnnndddd11111111mmmm") // v6 +INST(arm_SSUB16, "SSUB16", "cccc01100001nnnndddd11110111mmmm") // v6 +INST(arm_UADD8, "UADD8", "cccc01100101nnnndddd11111001mmmm") // v6 +INST(arm_UADD16, "UADD16", "cccc01100101nnnndddd11110001mmmm") // v6 +INST(arm_UASX, "UASX", "cccc01100101nnnndddd11110011mmmm") // v6 +INST(arm_USAX, "USAX", "cccc01100101nnnndddd11110101mmmm") // v6 +INST(arm_USUB8, "USUB8", "cccc01100101nnnndddd11111111mmmm") // v6 +INST(arm_USUB16, "USUB16", "cccc01100101nnnndddd11110111mmmm") // v6 + +// Parallel Add/Subtract (Saturating) instructions +INST(arm_QADD8, "QADD8", "cccc01100010nnnndddd11111001mmmm") // v6 +INST(arm_QADD16, "QADD16", "cccc01100010nnnndddd11110001mmmm") // v6 +INST(arm_QASX, "QASX", "cccc01100010nnnndddd11110011mmmm") // v6 +INST(arm_QSAX, "QSAX", "cccc01100010nnnndddd11110101mmmm") // v6 +INST(arm_QSUB8, "QSUB8", "cccc01100010nnnndddd11111111mmmm") // v6 +INST(arm_QSUB16, "QSUB16", "cccc01100010nnnndddd11110111mmmm") // v6 +INST(arm_UQADD8, "UQADD8", "cccc01100110nnnndddd11111001mmmm") // v6 +INST(arm_UQADD16, "UQADD16", "cccc01100110nnnndddd11110001mmmm") // v6 +INST(arm_UQASX, "UQASX", "cccc01100110nnnndddd11110011mmmm") // v6 +INST(arm_UQSAX, "UQSAX", "cccc01100110nnnndddd11110101mmmm") // v6 +INST(arm_UQSUB8, "UQSUB8", "cccc01100110nnnndddd11111111mmmm") // v6 +INST(arm_UQSUB16, "UQSUB16", "cccc01100110nnnndddd11110111mmmm") // v6 + +// Parallel Add/Subtract (Halving) instructions +INST(arm_SHADD8, "SHADD8", "cccc01100011nnnndddd11111001mmmm") // v6 +INST(arm_SHADD16, "SHADD16", "cccc01100011nnnndddd11110001mmmm") // v6 +INST(arm_SHASX, "SHASX", "cccc01100011nnnndddd11110011mmmm") // v6 +INST(arm_SHSAX, "SHSAX", "cccc01100011nnnndddd11110101mmmm") // v6 +INST(arm_SHSUB8, "SHSUB8", "cccc01100011nnnndddd11111111mmmm") // v6 +INST(arm_SHSUB16, "SHSUB16", "cccc01100011nnnndddd11110111mmmm") // v6 +INST(arm_UHADD8, "UHADD8", "cccc01100111nnnndddd11111001mmmm") // v6 +INST(arm_UHADD16, "UHADD16", "cccc01100111nnnndddd11110001mmmm") // v6 +INST(arm_UHASX, "UHASX", "cccc01100111nnnndddd11110011mmmm") // v6 +INST(arm_UHSAX, "UHSAX", "cccc01100111nnnndddd11110101mmmm") // v6 +INST(arm_UHSUB8, "UHSUB8", "cccc01100111nnnndddd11111111mmmm") // v6 +INST(arm_UHSUB16, "UHSUB16", "cccc01100111nnnndddd11110111mmmm") // v6 + +// Saturated Add/Subtract instructions +INST(arm_QADD, "QADD", "cccc00010000nnnndddd00000101mmmm") // v5xP +INST(arm_QSUB, "QSUB", "cccc00010010nnnndddd00000101mmmm") // v5xP +INST(arm_QDADD, "QDADD", "cccc00010100nnnndddd00000101mmmm") // v5xP +INST(arm_QDSUB, "QDSUB", "cccc00010110nnnndddd00000101mmmm") // v5xP + +// Status Register Access instructions +INST(arm_CPS, "CPS", "111100010000---00000000---0-----") // v6 +INST(arm_SETEND, "SETEND", "1111000100000001000000e000000000") // v6 +INST(arm_MRS, "MRS", "cccc000100001111dddd000000000000") // v3 +INST(arm_MSR_imm, "MSR (imm)", "cccc00110010mmmm1111rrrrvvvvvvvv") // v3 +INST(arm_MSR_reg, "MSR (reg)", "cccc00010010mmmm111100000000nnnn") // v3 +INST(arm_RFE, "RFE", "1111100--0-1----0000101000000000") // v6 +INST(arm_SRS, "SRS", "1111100--1-0110100000101000-----") // v6 +#undef INST + }; + // If a matcher has more bits in its mask it is more specific, so it should come first. + std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) { + return mcl::bit::count_ones(matcher1.second) > mcl::bit::count_ones(matcher2.second); + }); + for (auto const& e : list) + printf("%s\n", e.inst_final); + } else if (which == "-asimd") { + std::vector table = { +#define INST(fn, name, bitstring) { name, GetMaskAndExpect(bitstring), "INST(" #fn ", " #name ", " #bitstring ")" }, +// Three registers of the same length +INST(asimd_VHADD, "VHADD", "1111001U0Dzznnnndddd0000NQM0mmmm") // ASIMD +INST(asimd_VQADD, "VQADD", "1111001U0Dzznnnndddd0000NQM1mmmm") // ASIMD +INST(asimd_VRHADD, "VRHADD", "1111001U0Dzznnnndddd0001NQM0mmmm") // ASIMD +INST(asimd_VAND_reg, "VAND (register)", "111100100D00nnnndddd0001NQM1mmmm") // ASIMD +INST(asimd_VBIC_reg, "VBIC (register)", "111100100D01nnnndddd0001NQM1mmmm") // ASIMD +INST(asimd_VORR_reg, "VORR (register)", "111100100D10nnnndddd0001NQM1mmmm") // ASIMD +INST(asimd_VORN_reg, "VORN (register)", "111100100D11nnnndddd0001NQM1mmmm") // ASIMD +INST(asimd_VEOR_reg, "VEOR (register)", "111100110D00nnnndddd0001NQM1mmmm") // ASIMD +INST(asimd_VBSL, "VBSL", "111100110D01nnnndddd0001NQM1mmmm") // ASIMD +INST(asimd_VBIT, "VBIT", "111100110D10nnnndddd0001NQM1mmmm") // ASIMD +INST(asimd_VBIF, "VBIF", "111100110D11nnnndddd0001NQM1mmmm") // ASIMD +INST(asimd_VHSUB, "VHSUB", "1111001U0Dzznnnndddd0010NQM0mmmm") // ASIMD +INST(asimd_VQSUB, "VQSUB", "1111001U0Dzznnnndddd0010NQM1mmmm") // ASIMD +INST(asimd_VCGT_reg, "VCGT (register)", "1111001U0Dzznnnndddd0011NQM0mmmm") // ASIMD +INST(asimd_VCGE_reg, "VCGE (register)", "1111001U0Dzznnnndddd0011NQM1mmmm") // ASIMD +INST(asimd_VSHL_reg, "VSHL (register)", "1111001U0Dzznnnndddd0100NQM0mmmm") // ASIMD +INST(asimd_VQSHL_reg, "VQSHL (register)", "1111001U0Dzznnnndddd0100NQM1mmmm") // ASIMD +INST(asimd_VRSHL, "VRSHL", "1111001U0Dzznnnndddd0101NQM0mmmm") // ASIMD +//INST(asimd_VQRSHL, "VQRSHL", "1111001U0-CC--------0101---1----") // ASIMD +INST(asimd_VMAX, "VMAX/VMIN (integer)", "1111001U0Dzznnnnmmmm0110NQMommmm") // ASIMD +INST(asimd_VABD, "VABD", "1111001U0Dzznnnndddd0111NQM0mmmm") // ASIMD +INST(asimd_VABA, "VABA", "1111001U0Dzznnnndddd0111NQM1mmmm") // ASIMD +INST(asimd_VADD_int, "VADD (integer)", "111100100Dzznnnndddd1000NQM0mmmm") // ASIMD +INST(asimd_VSUB_int, "VSUB (integer)", "111100110Dzznnnndddd1000NQM0mmmm") // ASIMD +INST(asimd_VTST, "VTST", "111100100Dzznnnndddd1000NQM1mmmm") // ASIMD +INST(asimd_VCEQ_reg, "VCEG (register)", "111100110Dzznnnndddd1000NQM1mmmm") // ASIMD +INST(asimd_VMLA, "VMLA/VMLS", "1111001o0Dzznnnndddd1001NQM0mmmm") // ASIMD +INST(asimd_VMUL, "VMUL", "1111001P0Dzznnnndddd1001NQM1mmmm") // ASIMD +INST(asimd_VPMAX_int, "VPMAX/VPMIN (integer)", "1111001U0Dzznnnndddd1010NQMommmm") // ASIMD +INST(v8_VMAXNM, "VMAXNM", "111100110D0znnnndddd1111NQM1mmmm") // v8 +INST(v8_VMINNM, "VMINNM", "111100110D1znnnndddd1111NQM1mmmm") // v8 +INST(asimd_VQDMULH, "VQDMULH", "111100100Dzznnnndddd1011NQM0mmmm") // ASIMD +INST(asimd_VQRDMULH, "VQRDMULH", "111100110Dzznnnndddd1011NQM0mmmm") // ASIMD +INST(asimd_VPADD, "VPADD", "111100100Dzznnnndddd1011NQM1mmmm") // ASIMD +INST(asimd_VFMA, "VFMA", "111100100D0znnnndddd1100NQM1mmmm") // ASIMD +INST(asimd_VFMS, "VFMS", "111100100D1znnnndddd1100NQM1mmmm") // ASIMD +INST(asimd_VADD_float, "VADD (floating-point)", "111100100D0znnnndddd1101NQM0mmmm") // ASIMD +INST(asimd_VSUB_float, "VSUB (floating-point)", "111100100D1znnnndddd1101NQM0mmmm") // ASIMD +INST(asimd_VPADD_float, "VPADD (floating-point)", "111100110D0znnnndddd1101NQM0mmmm") // ASIMD +INST(asimd_VABD_float, "VABD (floating-point)", "111100110D1znnnndddd1101NQM0mmmm") // ASIMD +INST(asimd_VMLA_float, "VMLA (floating-point)", "111100100D0znnnndddd1101NQM1mmmm") // ASIMD +INST(asimd_VMLS_float, "VMLS (floating-point)", "111100100D1znnnndddd1101NQM1mmmm") // ASIMD +INST(asimd_VMUL_float, "VMUL (floating-point)", "111100110D0znnnndddd1101NQM1mmmm") // ASIMD +INST(asimd_VCEQ_reg_float, "VCEQ (register)", "111100100D0znnnndddd1110NQM0mmmm") // ASIMD +INST(asimd_VCGE_reg_float, "VCGE (register)", "111100110D0znnnndddd1110NQM0mmmm") // ASIMD +INST(asimd_VCGT_reg_float, "VCGT (register)", "111100110D1znnnndddd1110NQM0mmmm") // ASIMD +INST(asimd_VACGE, "VACGE", "111100110Doznnnndddd1110NQM1mmmm") // ASIMD +INST(asimd_VMAX_float, "VMAX (floating-point)", "111100100D0znnnndddd1111NQM0mmmm") // ASIMD +INST(asimd_VMIN_float, "VMIN (floating-point)", "111100100D1znnnndddd1111NQM0mmmm") // ASIMD +INST(asimd_VPMAX_float, "VPMAX (floating-point)", "111100110D0znnnndddd1111NQM0mmmm") // ASIMD +INST(asimd_VPMIN_float, "VPMIN (floating-point)", "111100110D1znnnndddd1111NQM0mmmm") // ASIMD +INST(asimd_VRECPS, "VRECPS", "111100100D0znnnndddd1111NQM1mmmm") // ASIMD +INST(asimd_VRSQRTS, "VRSQRTS", "111100100D1znnnndddd1111NQM1mmmm") // ASIMD +INST(v8_SHA256H, "SHA256H", "111100110D00nnnndddd1100NQM0mmmm") // v8 +INST(v8_SHA256H2, "SHA256H2", "111100110D01nnnndddd1100NQM0mmmm") // v8 +INST(v8_SHA256SU1, "SHA256SU1", "111100110D10nnnndddd1100NQM0mmmm") // v8 + +// Three registers of different lengths +INST(asimd_VADDL, "VADDL/VADDW", "1111001U1Dzznnnndddd000oN0M0mmmm") // ASIMD +INST(asimd_VSUBL, "VSUBL/VSUBW", "1111001U1Dzznnnndddd001oN0M0mmmm") // ASIMD +//INST(asimd_VADDHN, "VADDHN", "111100101-----------0100-0-0----") // ASIMD +//INST(asimd_VRADDHN, "VRADDHN", "111100111-----------0100-0-0----") // ASIMD +INST(asimd_VABAL, "VABAL", "1111001U1Dzznnnndddd0101N0M0mmmm") // ASIMD +//INST(asimd_VSUBHN, "VSUBHN", "111100101-----------0110-0-0----") // ASIMD +//INST(asimd_VRSUBHN, "VRSUBHN", "111100111-----------0110-0-0----") // ASIMD +INST(asimd_VABDL, "VABDL", "1111001U1Dzznnnndddd0111N0M0mmmm") // ASIMD +INST(asimd_VMLAL, "VMLAL/VMLSL", "1111001U1Dzznnnndddd10o0N0M0mmmm") // ASIMD +//INST(asimd_VQDMLAL, "VQDMLAL", "111100101-----------10-1-0-0----") // ASIMD +INST(asimd_VMULL, "VMULL", "1111001U1Dzznnnndddd11P0N0M0mmmm") // ASIMD +//INST(asimd_VQDMULL, "VQDMULL", "111100101-----------1101-0-0----") // ASIMD + +// Two registers and a scalar +INST(asimd_VMLA_scalar, "VMLA (scalar)", "1111001Q1Dzznnnndddd0o0FN1M0mmmm") // ASIMD +INST(asimd_VMLAL_scalar, "VMLAL (scalar)", "1111001U1dzznnnndddd0o10N1M0mmmm") // ASIMD +//INST(asimd_VQDMLAL_scalar, "VQDMLAL/VQDMLSL (scalar)", "111100101-BB--------0x11-1-0----") // ASIMD +INST(asimd_VMUL_scalar, "VMUL (scalar)", "1111001Q1Dzznnnndddd100FN1M0mmmm") // ASIMD +INST(asimd_VMULL_scalar, "VMULL (scalar)", "1111001U1Dzznnnndddd1010N1M0mmmm") // ASIMD +INST(asimd_VQDMULL_scalar, "VQDMULL (scalar)", "111100101Dzznnnndddd1011N1M0mmmm") // ASIMD +INST(asimd_VQDMULH_scalar, "VQDMULH (scalar)", "1111001Q1Dzznnnndddd1100N1M0mmmm") // ASIMD +INST(asimd_VQRDMULH_scalar, "VQRDMULH (scalar)", "1111001Q1Dzznnnndddd1101N1M0mmmm") // ASIMD + +// Two registers and a shift amount +INST(asimd_SHR, "SHR", "1111001U1Diiiiiidddd0000LQM1mmmm") // ASIMD +INST(asimd_SRA, "SRA", "1111001U1Diiiiiidddd0001LQM1mmmm") // ASIMD +INST(asimd_VRSHR, "VRSHR", "1111001U1Diiiiiidddd0010LQM1mmmm") // ASIMD +INST(asimd_VRSRA, "VRSRA", "1111001U1Diiiiiidddd0011LQM1mmmm") // ASIMD +INST(asimd_VSRI, "VSRI", "111100111Diiiiiidddd0100LQM1mmmm") // ASIMD +INST(asimd_VSHL, "VSHL", "111100101Diiiiiidddd0101LQM1mmmm") // ASIMD +INST(asimd_VSLI, "VSLI", "111100111Diiiiiidddd0101LQM1mmmm") // ASIMD +INST(asimd_VQSHL, "VQSHL" , "1111001U1Diiiiiidddd011oLQM1mmmm") // ASIMD +INST(asimd_VSHRN, "VSHRN", "111100101Diiiiiidddd100000M1mmmm") // ASIMD +INST(asimd_VRSHRN, "VRSHRN", "111100101Diiiiiidddd100001M1mmmm") // ASIMD +INST(asimd_VQSHRUN, "VQSHRUN", "111100111Diiiiiidddd100000M1mmmm") // ASIMD +INST(asimd_VQRSHRUN, "VQRSHRUN", "111100111Diiiiiidddd100001M1mmmm") // ASIMD +INST(asimd_VQSHRN, "VQSHRN", "1111001U1Diiiiiidddd100100M1mmmm") // ASIMD +INST(asimd_VQRSHRN, "VQRSHRN", "1111001U1Diiiiiidddd100101M1mmmm") // ASIMD +INST(asimd_VSHLL, "VSHLL", "1111001U1Diiiiiidddd101000M1mmmm") // ASIMD +INST(asimd_VCVT_fixed, "VCVT (fixed-point)", "1111001U1Diiiiiidddd111o0QM1mmmm") // ASIMD + +// Two registers, miscellaneous +INST(asimd_VREV, "VREV{16,32,64}", "111100111D11zz00dddd000ooQM0mmmm") // ASIMD +INST(asimd_VPADDL, "VPADDL", "111100111D11zz00dddd0010oQM0mmmm") // ASIMD +INST(asimd_VCLS, "VCLS", "111100111D11zz00dddd01000QM0mmmm") // ASIMD +INST(asimd_VCLZ, "VCLZ", "111100111D11zz00dddd01001QM0mmmm") // ASIMD +INST(asimd_VCNT, "VCNT", "111100111D11zz00dddd01010QM0mmmm") // ASIMD +INST(asimd_VMVN_reg, "VMVN_reg", "111100111D11zz00dddd01011QM0mmmm") // ASIMD +INST(asimd_VPADAL, "VPADAL", "111100111D11zz00dddd0110oQM0mmmm") // ASIMD +INST(asimd_VQABS, "VQABS", "111100111D11zz00dddd01110QM0mmmm") // ASIMD +INST(asimd_VQNEG, "VQNEG", "111100111D11zz00dddd01111QM0mmmm") // ASIMD +INST(asimd_VCGT_zero, "VCGT (zero)", "111100111D11zz01dddd0F000QM0mmmm") // ASIMD +INST(asimd_VCGE_zero, "VCGE (zero)", "111100111D11zz01dddd0F001QM0mmmm") // ASIMD +INST(asimd_VCEQ_zero, "VCEQ (zero)", "111100111D11zz01dddd0F010QM0mmmm") // ASIMD +INST(asimd_VCLE_zero, "VCLE (zero)", "111100111D11zz01dddd0F011QM0mmmm") // ASIMD +INST(asimd_VCLT_zero, "VCLT (zero)", "111100111D11zz01dddd0F100QM0mmmm") // ASIMD +INST(arm_UDF, "UNALLOCATED", "111100111-11--01----01101--0----") // v8 +INST(asimd_VABS, "VABS", "111100111D11zz01dddd0F110QM0mmmm") // ASIMD +INST(asimd_VNEG, "VNEG", "111100111D11zz01dddd0F111QM0mmmm") // ASIMD +INST(asimd_VSWP, "VSWP", "111100111D110010dddd00000QM0mmmm") // ASIMD +INST(arm_UDF, "UNALLOCATED", "111100111-11--10----00000--0----") // ASIMD +INST(asimd_VTRN, "VTRN", "111100111D11zz10dddd00001QM0mmmm") // ASIMD +INST(asimd_VUZP, "VUZP", "111100111D11zz10dddd00010QM0mmmm") // ASIMD +INST(asimd_VZIP, "VZIP", "111100111D11zz10dddd00011QM0mmmm") // ASIMD +INST(asimd_VMOVN, "VMOVN", "111100111D11zz10dddd001000M0mmmm") // ASIMD +INST(asimd_VQMOVUN, "VQMOVUN", "111100111D11zz10dddd001001M0mmmm") // ASIMD +INST(asimd_VQMOVN, "VQMOVN", "111100111D11zz10dddd00101oM0mmmm") // ASIMD +INST(asimd_VSHLL_max, "VSHLL_max", "111100111D11zz10dddd001100M0mmmm") // ASIMD +INST(v8_VRINTN, "VRINTN", "111100111D11zz10dddd01000QM0mmmm") // v8 +INST(v8_VRINTX, "VRINTX", "111100111D11zz10dddd01001QM0mmmm") // v8 +INST(v8_VRINTA, "VRINTA", "111100111D11zz10dddd01010QM0mmmm") // v8 +INST(v8_VRINTZ, "VRINTZ", "111100111D11zz10dddd01011QM0mmmm") // v8 +INST(v8_VRINTM, "VRINTM", "111100111D11zz10dddd01101QM0mmmm") // v8 +INST(v8_VRINTP, "VRINTP", "111100111D11zz10dddd01111QM0mmmm") // v8 +INST(asimd_VCVT_half, "VCVT (half-precision)", "111100111D11zz10dddd011o00M0mmmm") // ASIMD +INST(arm_UDF, "UNALLOCATED", "111100111-11--10----011-01-0----") // ASIMD +INST(v8_VCVTA, "VCVTA", "111100111D11zz11dddd0000oQM0mmmm") // v8 +INST(v8_VCVTN, "VCVTN", "111100111D11zz11dddd0001oQM0mmmm") // v8 +INST(v8_VCVTP, "VCVTP", "111100111D11zz11dddd0010oQM0mmmm") // v8 +INST(v8_VCVTM, "VCVTM", "111100111D11zz11dddd0011oQM0mmmm") // v8 +INST(asimd_VRECPE, "VRECPE", "111100111D11zz11dddd010F0QM0mmmm") // ASIMD +INST(asimd_VRSQRTE, "VRSQRTE", "111100111D11zz11dddd010F1QM0mmmm") // ASIMD +INST(asimd_VCVT_integer, "VCVT (integer)", "111100111D11zz11dddd011oUQM0mmmm") // ASIMD + +// Two registers, cryptography +INST(v8_AESE, "AESE", "111100111D11zz00dddd001100M0mmmm") // v8 +INST(v8_AESD, "AESD", "111100111D11zz00dddd001101M0mmmm") // v8 +INST(v8_AESMC, "AESMC", "111100111D11zz00dddd001110M0mmmm") // v8 +INST(v8_AESIMC, "AESIMC", "111100111D11zz00dddd001111M0mmmm") // v8 +INST(arm_UDF, "UNALLOCATED", "111100111-11--01----001010-0----") // v8 +INST(arm_UDF, "UNALLOCATED (SHA1H)", "111100111-11--01----001011-0----") // v8 +INST(arm_UDF, "UNALLOCATED (SHA1SU1)", "111100111-11--10----001110-0----") // v8 +INST(v8_SHA256SU0, "SHA256SU0", "111100111D11zz10dddd001111M0mmmm") // v8 + +// One register and modified immediate +INST(asimd_VMOV_imm, "VBIC, VMOV, VMVN, VORR (immediate)", "1111001a1D000bcdVVVVmmmm0Qo1efgh") // ASIMD + +// Miscellaneous +INST(asimd_VEXT, "VEXT", "111100101D11nnnnddddiiiiNQM0mmmm") // ASIMD +INST(asimd_VTBL, "VTBL", "111100111D11nnnndddd10zzN0M0mmmm") // ASIMD +INST(asimd_VTBX, "VTBX", "111100111D11nnnndddd10zzN1M0mmmm") // ASIMD +INST(asimd_VDUP_scalar, "VDUP (scalar)", "111100111D11iiiidddd11000QM0mmmm") // ASIMD +INST(arm_UDF, "UNALLOCATED", "111100111-11--------11-----0----") // ASIMD + +// Advanced SIMD load/store structures +INST(v8_VST_multiple, "VST{1-4} (multiple)", "111101000D00nnnnddddxxxxzzaammmm") // v8 +INST(v8_VLD_multiple, "VLD{1-4} (multiple)", "111101000D10nnnnddddxxxxzzaammmm") // v8 +INST(arm_UDF, "UNALLOCATED", "111101000--0--------1011--------") // v8 +INST(arm_UDF, "UNALLOCATED", "111101000--0--------11----------") // v8 +INST(arm_UDF, "UNALLOCATED", "111101001-00--------11----------") // v8 +INST(v8_VLD_all_lanes, "VLD{1-4} (all lanes)", "111101001D10nnnndddd11nnzzTammmm") // v8 +INST(v8_VST_single, "VST{1-4} (single)", "111101001D00nnnnddddzzNNaaaammmm") // v8 +INST(v8_VLD_single, "VLD{1-4} (single)", "111101001D10nnnnddddzzNNaaaammmm") // v8 +#undef INST + }; + // Exceptions to the rule of thumb. + const std::set comes_first{ + "VBIC, VMOV, VMVN, VORR (immediate)", + "VEXT", + "VTBL", + "VTBX", + "VDUP (scalar)", + }; + const std::set comes_last{ + "VMLA (scalar)", + "VMLAL (scalar)", + "VQDMLAL/VQDMLSL (scalar)", + "VMUL (scalar)", + "VMULL (scalar)", + "VQDMULL (scalar)", + "VQDMULH (scalar)", + "VQRDMULH (scalar)", + }; + const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_first.count(e.first) > 0; + }); + const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_last.count(e.first) == 0; + }); + // If a matcher has more bits in its mask it is more specific, so it should come first. + std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) { + return mcl::bit::count_ones(a.second) > mcl::bit::count_ones(b.second); + }); + for (auto const& e : table) + printf("%s\n", e.inst_final); + } else if (which == "-a64") { + std::vector list = { +#define INST(fn, name, bitstring) { name, GetMaskAndExpect(bitstring), "INST(" #fn ", " #name ", " #bitstring ")" }, +// Data processing - Immediate - PC relative addressing +INST(ADR, "ADR", "0ii10000iiiiiiiiiiiiiiiiiiiddddd") +INST(ADRP, "ADRP", "1ii10000iiiiiiiiiiiiiiiiiiiddddd") + +// Data processing - Immediate - Add/Sub (with tags) +//INST(ADDG, "ADDG", "1001000110iiiiii00IIIInnnnnddddd") // ARMv8.5 +//INST(SUBG, "SUBG", "1101000110iiiiii00IIIInnnnnddddd") // ARMv8.5 + +// Data processing - Immediate - Add/Sub +INST(ADD_imm, "ADD (immediate)", "z0010001ssiiiiiiiiiiiinnnnnddddd") +INST(ADDS_imm, "ADDS (immediate)", "z0110001ssiiiiiiiiiiiinnnnnddddd") +INST(SUB_imm, "SUB (immediate)", "z1010001ssiiiiiiiiiiiinnnnnddddd") +INST(SUBS_imm, "SUBS (immediate)", "z1110001ssiiiiiiiiiiiinnnnnddddd") + +// Data processing - Immediate - Logical +INST(AND_imm, "AND (immediate)", "z00100100Nrrrrrrssssssnnnnnddddd") +INST(ORR_imm, "ORR (immediate)", "z01100100Nrrrrrrssssssnnnnnddddd") +INST(EOR_imm, "EOR (immediate)", "z10100100Nrrrrrrssssssnnnnnddddd") +INST(ANDS_imm, "ANDS (immediate)", "z11100100Nrrrrrrssssssnnnnnddddd") + +// Data processing - Immediate - Move Wide +INST(MOVN, "MOVN", "z00100101ssiiiiiiiiiiiiiiiiddddd") +INST(MOVZ, "MOVZ", "z10100101ssiiiiiiiiiiiiiiiiddddd") +INST(MOVK, "MOVK", "z11100101ssiiiiiiiiiiiiiiiiddddd") + +// Data processing - Immediate - Bitfield +INST(SBFM, "SBFM", "z00100110Nrrrrrrssssssnnnnnddddd") +INST(BFM, "BFM", "z01100110Nrrrrrrssssssnnnnnddddd") +INST(UBFM, "UBFM", "z10100110Nrrrrrrssssssnnnnnddddd") +INST(ASR_1, "ASR (immediate, 32-bit)", "00010011000rrrrr011111nnnnnddddd") +INST(ASR_2, "ASR (immediate, 64-bit)", "1001001101rrrrrr111111nnnnnddddd") +INST(SXTB_1, "SXTB (32-bit)", "0001001100000000000111nnnnnddddd") +INST(SXTB_2, "SXTB (64-bit)", "1001001101000000000111nnnnnddddd") +INST(SXTH_1, "SXTH (32-bit)", "0001001100000000001111nnnnnddddd") +INST(SXTH_2, "SXTH (64-bit)", "1001001101000000001111nnnnnddddd") +INST(SXTW, "SXTW", "1001001101000000011111nnnnnddddd") + +// Data processing - Immediate - Extract +INST(EXTR, "EXTR", "z00100111N0mmmmmssssssnnnnnddddd") + +// Conditional branch +INST(B_cond, "B.cond", "01010100iiiiiiiiiiiiiiiiiii0cccc") + +// Exception generation +INST(SVC, "SVC", "11010100000iiiiiiiiiiiiiiii00001") +//INST(HVC, "HVC", "11010100000iiiiiiiiiiiiiiii00010") +//INST(SMC, "SMC", "11010100000iiiiiiiiiiiiiiii00011") +INST(BRK, "BRK", "11010100001iiiiiiiiiiiiiiii00000") +//INST(HLT, "HLT", "11010100010iiiiiiiiiiiiiiii00000") +//INST(DCPS1, "DCPS1", "11010100101iiiiiiiiiiiiiiii00001") +//INST(DCPS2, "DCPS2", "11010100101iiiiiiiiiiiiiiii00010") +//INST(DCPS3, "DCPS3", "11010100101iiiiiiiiiiiiiiii00011") + +// System +//INST(MSR_imm, "MSR (immediate)", "1101010100000ooo0100MMMMooo11111") +INST(HINT, "HINT", "11010101000000110010MMMMooo11111") +INST(NOP, "NOP", "11010101000000110010000000011111") +INST(YIELD, "YIELD", "11010101000000110010000000111111") +INST(WFE, "WFE", "11010101000000110010000001011111") +INST(WFI, "WFI", "11010101000000110010000001111111") +INST(SEV, "SEV", "11010101000000110010000010011111") +INST(SEVL, "SEVL", "11010101000000110010000010111111") +//INST(DGH, "DGH", "11010101000000110010000011011111") // v8.6 +//INST(WFET, "WFET", "110101010000001100010000000ddddd") // v8.7 +//INST(WFIT, "WFIT", "110101010000001100010000001ddddd") // v8.7 +//INST(XPAC_1, "XPACD, XPACI, XPACLRI", "110110101100000101000D11111ddddd") +//INST(XPAC_2, "XPACD, XPACI, XPACLRI", "11010101000000110010000011111111") +//INST(PACIA_1, "PACIA, PACIA1716, PACIASP, PACIAZ, PACIZA", "110110101100000100Z000nnnnnddddd") +//INST(PACIA_2, "PACIA, PACIA1716, PACIASP, PACIAZ, PACIZA", "1101010100000011001000-100-11111") +//INST(PACIB_1, "PACIB, PACIB1716, PACIBSP, PACIBZ, PACIZB", "110110101100000100Z001nnnnnddddd") +//INST(PACIB_2, "PACIB, PACIB1716, PACIBSP, PACIBZ, PACIZB", "1101010100000011001000-101-11111") +//INST(AUTIA_1, "AUTIA, AUTIA1716, AUTIASP, AUTIAZ, AUTIZA", "110110101100000100Z100nnnnnddddd") +//INST(AUTIA_2, "AUTIA, AUTIA1716, AUTIASP, AUTIAZ, AUTIZA", "1101010100000011001000-110-11111") +//INST(AUTIB_1, "AUTIB, AUTIB1716, AUTIBSP, AUTIBZ, AUTIZB", "110110101100000100Z101nnnnnddddd") +//INST(AUTIB_2, "AUTIB, AUTIB1716, AUTIBSP, AUTIBZ, AUTIZB", "1101010100000011001000-111-11111") +//INST(BTI, "BTI", "110101010000001100100100ii011111") // ARMv8.5 +//INST(ESB, "ESB", "11010101000000110010001000011111") +//INST(PSB, "PSB CSYNC", "11010101000000110010001000111111") +//INST(TSB, "TSB CSYNC", "11010101000000110010001001011111") // ARMv8.5 +//INST(CSDB, "CSDB", "11010101000000110010001010011111") +INST(CLREX, "CLREX", "11010101000000110011MMMM01011111") +INST(DSB, "DSB", "11010101000000110011MMMM10011111") +//INST(SSBB, "SSBB", "11010101000000110011000010011111") +//INST(PSSBB, "PSSBB", "11010101000000110011010010011111") +INST(DMB, "DMB", "11010101000000110011MMMM10111111") +INST(ISB, "ISB", "11010101000000110011MMMM11011111") +//INST(SB, "SB", "11010101000000110011000011111111") +//INST(SYS, "SYS", "1101010100001oooNNNNMMMMooottttt") +INST(MSR_reg, "MSR (register)", "110101010001poooNNNNMMMMooottttt") +//INST(SYSL, "SYSL", "1101010100101oooNNNNMMMMooottttt") +INST(MRS, "MRS", "110101010011poooNNNNMMMMooottttt") + +// System - Flag manipulation instructions +INST(CFINV, "CFINV", "11010101000000000100000000011111") // ARMv8.4 +INST(RMIF, "RMIF", "10111010000iiiiii00001nnnnn0IIII") // ARMv8.4 +//INST(SETF8, "SETF8", "0011101000000000000010nnnnn01101") // ARMv8.4 +//INST(SETF16, "SETF16", "0011101000000000010010nnnnn01101") // ARMv8.4 + +// System - Flag format instructions +INST(XAFlag, "XAFlag", "11010101000000000100000000111111") // ARMv8.5 +INST(AXFlag, "AXFlag", "11010101000000000100000001011111") // ARMv8.5 + +// SYS: Data Cache +INST(DC_IVAC, "DC IVAC", "110101010000100001110110001ttttt") +INST(DC_ISW, "DC ISW", "110101010000100001110110010ttttt") +INST(DC_CSW, "DC CSW", "110101010000100001111010010ttttt") +INST(DC_CISW, "DC CISW", "110101010000100001111110010ttttt") +INST(DC_ZVA, "DC ZVA", "110101010000101101110100001ttttt") +INST(DC_CVAC, "DC CVAC", "110101010000101101111010001ttttt") +INST(DC_CVAU, "DC CVAU", "110101010000101101111011001ttttt") +INST(DC_CVAP, "DC CVAP", "110101010000101101111100001ttttt") +INST(DC_CIVAC, "DC CIVAC", "110101010000101101111110001ttttt") + +// SYS: Instruction Cache +INST(IC_IALLU, "IC IALLU", "11010101000010000111010100011111") +INST(IC_IALLUIS, "IC IALLUIS", "11010101000010000111000100011111") +INST(IC_IVAU, "IC IVAU", "110101010000101101110101001ttttt") + +// Unconditional branch (Register) +INST(BLR, "BLR", "1101011000111111000000nnnnn00000") +INST(BR, "BR", "1101011000011111000000nnnnn00000") +//INST(DRPS, "DRPS", "11010110101111110000001111100000") +//INST(ERET, "ERET", "11010110100111110000001111100000") +INST(RET, "RET", "1101011001011111000000nnnnn00000") +//INST(BLRA, "BLRAA, BLRAAZ, BLRAB, BLRABZ", "1101011Z0011111100001Mnnnnnmmmmm") // ARMv8.3 +//INST(BRA, "BRAA, BRAAZ, BRAB, BRABZ", "1101011Z0001111100001Mnnnnnmmmmm") // ARMv8.3 +//INST(ERETA, "ERETAA, ERETAB", "110101101001111100001M1111111111") // ARMv8.3 +//INST(RETA, "RETAA, RETAB", "110101100101111100001M1111111111") // ARMv8.3 + +// Unconditional branch (immediate) +INST(B_uncond, "B", "000101iiiiiiiiiiiiiiiiiiiiiiiiii") +INST(BL, "BL", "100101iiiiiiiiiiiiiiiiiiiiiiiiii") + +// Compare and branch (immediate) +INST(CBZ, "CBZ", "z0110100iiiiiiiiiiiiiiiiiiittttt") +INST(CBNZ, "CBNZ", "z0110101iiiiiiiiiiiiiiiiiiittttt") +INST(TBZ, "TBZ", "b0110110bbbbbiiiiiiiiiiiiiittttt") +INST(TBNZ, "TBNZ", "b0110111bbbbbiiiiiiiiiiiiiittttt") + +// Loads and stores - Advanced SIMD Load/Store multiple structures +INST(STx_mult_1, "STx (multiple structures)", "0Q00110000000000oooozznnnnnttttt") +INST(STx_mult_2, "STx (multiple structures)", "0Q001100100mmmmmoooozznnnnnttttt") +INST(LDx_mult_1, "LDx (multiple structures)", "0Q00110001000000oooozznnnnnttttt") +INST(LDx_mult_2, "LDx (multiple structures)", "0Q001100110mmmmmoooozznnnnnttttt") + +// Loads and stores - Advanced SIMD Load/Store single structures +INST(ST1_sngl_1, "ST1 (single structure)", "0Q00110100000000oo0Szznnnnnttttt") +INST(ST1_sngl_2, "ST1 (single structure)", "0Q001101100mmmmmoo0Szznnnnnttttt") +INST(ST3_sngl_1, "ST3 (single structure)", "0Q00110100000000oo1Szznnnnnttttt") +INST(ST3_sngl_2, "ST3 (single structure)", "0Q001101100mmmmmoo1Szznnnnnttttt") +INST(ST2_sngl_1, "ST2 (single structure)", "0Q00110100100000oo0Szznnnnnttttt") +INST(ST2_sngl_2, "ST2 (single structure)", "0Q001101101mmmmmoo0Szznnnnnttttt") +INST(ST4_sngl_1, "ST4 (single structure)", "0Q00110100100000oo1Szznnnnnttttt") +INST(ST4_sngl_2, "ST4 (single structure)", "0Q001101101mmmmmoo1Szznnnnnttttt") +INST(LD1_sngl_1, "LD1 (single structure)", "0Q00110101000000oo0Szznnnnnttttt") +INST(LD1_sngl_2, "LD1 (single structure)", "0Q001101110mmmmmoo0Szznnnnnttttt") +INST(LD3_sngl_1, "LD3 (single structure)", "0Q00110101000000oo1Szznnnnnttttt") +INST(LD3_sngl_2, "LD3 (single structure)", "0Q001101110mmmmmoo1Szznnnnnttttt") +INST(LD1R_1, "LD1R", "0Q001101010000001100zznnnnnttttt") +INST(LD1R_2, "LD1R", "0Q001101110mmmmm1100zznnnnnttttt") +INST(LD3R_1, "LD3R", "0Q001101010000001110zznnnnnttttt") +INST(LD3R_2, "LD3R", "0Q001101110mmmmm1110zznnnnnttttt") +INST(LD2_sngl_1, "LD2 (single structure)", "0Q00110101100000oo0Szznnnnnttttt") +INST(LD2_sngl_2, "LD2 (single structure)", "0Q001101111mmmmmoo0Szznnnnnttttt") +INST(LD4_sngl_1, "LD4 (single structure)", "0Q00110101100000oo1Szznnnnnttttt") +INST(LD4_sngl_2, "LD4 (single structure)", "0Q001101111mmmmmoo1Szznnnnnttttt") +INST(LD2R_1, "LD2R", "0Q001101011000001100zznnnnnttttt") +INST(LD2R_2, "LD2R", "0Q001101111mmmmm1100zznnnnnttttt") +INST(LD4R_1, "LD4R", "0Q001101011000001110zznnnnnttttt") +INST(LD4R_2, "LD4R", "0Q001101111mmmmm1110zznnnnnttttt") + +// Loads and stores - Load/Store Exclusive +INST(STXR, "STXRB, STXRH, STXR", "zz001000000sssss011111nnnnnttttt") +INST(STLXR, "STLXRB, STLXRH, STLXR", "zz001000000sssss111111nnnnnttttt") +INST(STXP, "STXP", "1z001000001sssss0uuuuunnnnnttttt") +INST(STLXP, "STLXP", "1z001000001sssss1uuuuunnnnnttttt") +INST(LDXR, "LDXRB, LDXRH, LDXR", "zz00100001011111011111nnnnnttttt") +INST(LDAXR, "LDAXRB, LDAXRH, LDAXR", "zz00100001011111111111nnnnnttttt") +INST(LDXP, "LDXP", "1z001000011111110uuuuunnnnnttttt") +INST(LDAXP, "LDAXP", "1z001000011111111uuuuunnnnnttttt") +INST(STLLR, "STLLRB, STLLRH, STLLR", "zz00100010011111011111nnnnnttttt") +INST(STLR, "STLRB, STLRH, STLR", "zz00100010011111111111nnnnnttttt") +INST(LDLAR, "LDLARB, LDLARH, LDLAR", "zz00100011011111011111nnnnnttttt") +INST(LDAR, "LDARB, LDARH, LDAR", "zz00100011011111111111nnnnnttttt") +//INST(CASP, "CASP, CASPA, CASPAL, CASPL", "0z0010000L1sssssp11111nnnnnttttt") // ARMv8.1 +//INST(CASB, "CASB, CASAB, CASALB, CASLB", "000010001L1sssssp11111nnnnnttttt") // ARMv8.1 +//INST(CASH, "CASH, CASAH, CASALH, CASLH", "010010001L1sssssp11111nnnnnttttt") // ARMv8.1 +//INST(CAS, "CAS, CASA, CASAL, CASL", "1z0010001L1sssssp11111nnnnnttttt") // ARMv8.1 + +// Loads and stores - Load register (literal) +INST(LDR_lit_gen, "LDR (literal)", "0z011000iiiiiiiiiiiiiiiiiiittttt") +INST(LDRSW_lit, "LDRSW (literal)", "10011000iiiiiiiiiiiiiiiiiiittttt") +INST(PRFM_lit, "PRFM (literal)", "11011000iiiiiiiiiiiiiiiiiiittttt") +INST(LDR_lit_fpsimd, "LDR (literal, SIMD&FP)", "oo011100iiiiiiiiiiiiiiiiiiittttt") + +// Loads and stores - Load/Store no-allocate pair +INST(STNP_LDNP_gen, "STNP/LDNP", "o01010000Liiiiiiiuuuuunnnnnttttt") +INST(STNP_LDNP_fpsimd, "STNP/LDNP (SIMD&FP)", "oo1011000Liiiiiiiuuuuunnnnnttttt") + +// Loads and stores - Load/Store register pair +INST(STP_LDP_gen, "STP/LDP", "oo10100pwLiiiiiiiuuuuunnnnnttttt") +INST(UnallocatedEncoding, "", "--1010000-----------------------") +INST(STP_LDP_fpsimd, "STP/LDP (SIMD&FP)", "oo10110pwLiiiiiiiuuuuunnnnnttttt") +INST(UnallocatedEncoding, "", "--1011000-----------------------") + +// Loads and stores - Load/Store register (unscaled immediate) +INST(STURx_LDURx, "STURx/LDURx", "zz111000oo0iiiiiiiii00nnnnnttttt") +INST(UnallocatedEncoding, "", "111110001-0---------00----------") +INST(UnallocatedEncoding, "", "10111000110---------00----------") +INST(PRFM_imm, "PRFM (immediate)", "1111100110iiiiiiiiiiiinnnnnttttt") +INST(PRFM_unscaled_imm, "PRFM (unscaled offset)", "11111000100iiiiiiiii00nnnnnttttt") +INST(STUR_fpsimd, "STUR (SIMD&FP)", "zz111100o00iiiiiiiii00nnnnnttttt") +INST(LDUR_fpsimd, "LDUR (SIMD&FP)", "zz111100o10iiiiiiiii00nnnnnttttt") + +// Loads and stores - Load/Store register (immediate pre/post-indexed) +INST(STRx_LDRx_imm_1, "STRx/LDRx (immediate)", "zz111000oo0iiiiiiiiip1nnnnnttttt") +INST(STRx_LDRx_imm_2, "STRx/LDRx (immediate)", "zz111001ooiiiiiiiiiiiinnnnnttttt") +INST(UnallocatedEncoding, "", "111110001-0----------1----------") +INST(UnallocatedEncoding, "", "10111000110----------1----------") +INST(UnallocatedEncoding, "", "1111100111----------------------") +INST(UnallocatedEncoding, "", "1011100111----------------------") +INST(STR_imm_fpsimd_1, "STR (immediate, SIMD&FP)", "zz111100o00iiiiiiiiip1nnnnnttttt") +INST(STR_imm_fpsimd_2, "STR (immediate, SIMD&FP)", "zz111101o0iiiiiiiiiiiinnnnnttttt") +INST(LDR_imm_fpsimd_1, "LDR (immediate, SIMD&FP)", "zz111100o10iiiiiiiiip1nnnnnttttt") +INST(LDR_imm_fpsimd_2, "LDR (immediate, SIMD&FP)", "zz111101o1iiiiiiiiiiiinnnnnttttt") +//INST(STGP_1, "STGP (post-index)", "0110100010iiiiiiimmmmmnnnnnttttt") // ARMv8.5 +//INST(STGP_2, "STGP (pre-index)", "0110100110iiiiiiimmmmmnnnnnttttt") // ARMv8.5 +//INST(STGP_3, "STGP (signed-offset)", "0110100100iiiiiiimmmmmnnnnnttttt") // ARMv8.5 + +// Loads and stores - Load/Store register (unprivileged) +INST(STTRB, "STTRB", "00111000000iiiiiiiii10nnnnnttttt") +INST(LDTRB, "LDTRB", "00111000010iiiiiiiii10nnnnnttttt") +INST(LDTRSB, "LDTRSB", "00111000oo0iiiiiiiii10nnnnnttttt") +INST(STTRH, "STTRH", "01111000000iiiiiiiii10nnnnnttttt") +INST(LDTRH, "LDTRH", "01111000010iiiiiiiii10nnnnnttttt") +INST(LDTRSH, "LDTRSH", "01111000oo0iiiiiiiii10nnnnnttttt") +INST(STTR, "STTR", "zz111000000iiiiiiiii10nnnnnttttt") +INST(LDTR, "LDTR", "zz111000010iiiiiiiii10nnnnnttttt") +INST(LDTRSW, "LDTRSW", "10111000100iiiiiiiii10nnnnnttttt") + +// Loads and stores - Atomic memory options +//INST(LDADDB, "LDADDB, LDADDAB, LDADDALB, LDADDLB", "00111000AR1sssss000000nnnnnttttt") +//INST(LDCLRB, "LDCLRB, LDCLRAB, LDCLRALB, LDCLRLB", "00111000AR1sssss000100nnnnnttttt") +//INST(LDEORB, "LDEORB, LDEORAB, LDEORALB, LDEORLB", "00111000AR1sssss001000nnnnnttttt") +//INST(LDSETB, "LDSETB, LDSETAB, LDSETALB, LDSETLB", "00111000AR1sssss001100nnnnnttttt") +//INST(LDSMAXB, "LDSMAXB, LDSMAXAB, LDSMAXALB, LDSMAXLB", "00111000AR1sssss010000nnnnnttttt") +//INST(LDSMINB, "LDSMINB, LDSMINAB, LDSMINALB, LDSMINLB", "00111000AR1sssss010100nnnnnttttt") +//INST(LDUMAXB, "LDUMAXB, LDUMAXAB, LDUMAXALB, LDUMAXLB", "00111000AR1sssss011000nnnnnttttt") +//INST(LDUMINB, "LDUMINB, LDUMINAB, LDUMINALB, LDUMINLB", "00111000AR1sssss011100nnnnnttttt") +//INST(SWPB, "SWPB, SWPAB, SWPALB, SWPLB", "00111000AR1sssss100000nnnnnttttt") +//INST(LDAPRB, "LDAPRB", "0011100010111111110000nnnnnttttt") +//INST(LDADDH, "LDADDH, LDADDAH, LDADDALH, LDADDLH", "01111000AR1sssss000000nnnnnttttt") +//INST(LDCLRH, "LDCLRH, LDCLRAH, LDCLRALH, LDCLRLH", "01111000AR1sssss000100nnnnnttttt") +//INST(LDEORH, "LDEORH, LDEORAH, LDEORALH, LDEORLH", "01111000AR1sssss001000nnnnnttttt") +//INST(LDSETH, "LDSETH, LDSETAH, LDSETALH, LDSETLH", "01111000AR1sssss001100nnnnnttttt") +//INST(LDSMAXH, "LDSMAXH, LDSMAXAH, LDSMAXALH, LDSMAXLH", "01111000AR1sssss010000nnnnnttttt") +//INST(LDSMINH, "LDSMINH, LDSMINAH, LDSMINALH, LDSMINLH", "01111000AR1sssss010100nnnnnttttt") +//INST(LDUMAXH, "LDUMAXH, LDUMAXAH, LDUMAXALH, LDUMAXLH", "01111000AR1sssss011000nnnnnttttt") +//INST(LDUMINH, "LDUMINH, LDUMINAH, LDUMINALH, LDUMINLH", "01111000AR1sssss011100nnnnnttttt") +//INST(SWPH, "SWPH, SWPAH, SWPALH, SWPLH", "01111000AR1sssss100000nnnnnttttt") +//INST(LDAPRH, "LDAPRH", "0111100010111111110000nnnnnttttt") +//INST(LDADD, "LDADD, LDADDA, LDADDAL, LDADDL", "1-111000AR1sssss000000nnnnnttttt") +//INST(LDCLR, "LDCLR, LDCLRA, LDCLRAL, LDCLRL", "1-111000AR1sssss000100nnnnnttttt") +//INST(LDEOR, "LDEOR, LDEORA, LDEORAL, LDEORL", "1-111000AR1sssss001000nnnnnttttt") +//INST(LDSET, "LDSET, LDSETA, LDSETAL, LDSETL", "1-111000AR1sssss001100nnnnnttttt") +//INST(LDSMAX, "LDSMAX, LDSMAXA, LDSMAXAL, LDSMAXL", "1-111000AR1sssss010000nnnnnttttt") +//INST(LDSMIN, "LDSMIN, LDSMINA, LDSMINAL, LDSMINL", "1-111000AR1sssss010100nnnnnttttt") +//INST(LDUMAX, "LDUMAX, LDUMAXA, LDUMAXAL, LDUMAXL", "1-111000AR1sssss011000nnnnnttttt") +//INST(LDUMIN, "LDUMIN, LDUMINA, LDUMINAL, LDUMINL", "1-111000AR1sssss011100nnnnnttttt") +//INST(SWP, "SWP, SWPA, SWPAL, SWPL", "1-111000AR1sssss100000nnnnnttttt") +//INST(LDAPR, "LDAPR", "1-11100010111111110000nnnnnttttt") +//INST(LD64B, "LD64B", "1111100000111111110100nnnnnttttt") // v8.7 +//INST(ST64B, "ST64B", "1111100000111111100100nnnnnttttt") // v8.7 +//INST(ST64BV, "ST64BV", "11111000001sssss101100nnnnnttttt") // v8.7 +//INST(ST64BV0, "ST64BV0", "11111000001sssss101000nnnnnttttt") // v8.7 + +// Loads and stores - Load/Store register (register offset) +INST(STRx_reg, "STRx (register)", "zz111000o01mmmmmxxxS10nnnnnttttt") +INST(LDRx_reg, "LDRx (register)", "zz111000o11mmmmmxxxS10nnnnnttttt") +INST(STR_reg_fpsimd, "STR (register, SIMD&FP)", "zz111100o01mmmmmxxxS10nnnnnttttt") +INST(LDR_reg_fpsimd, "LDR (register, SIMD&FP)", "zz111100o11mmmmmxxxS10nnnnnttttt") + +// Loads and stores - Load/Store memory tags +//INST(STG_1, "STG (post-index)", "11011001001iiiiiiiii01nnnnn11111") // ARMv8.5 +//INST(STG_2, "STG (pre-index)", "11011001001iiiiiiiii11nnnnn11111") // ARMv8.5 +//INST(STG_3, "STG (signed-offset)", "11011001001iiiiiiiii10nnnnn11111") // ARMv8.5 +//INST(LDG, "LDG", "11011001011iiiiiiiii00nnnnnttttt") // ARMv8.5 +//INST(STZG_1, "STZG (post-index)", "11011001011iiiiiiiii01nnnnn11111") // ARMv8.5 +//INST(STZG_2, "STZG (pre-index)", "11011001011iiiiiiiii11nnnnn11111") // ARMv8.5 +//INST(STZG_3, "STZG (signed-offset)", "11011001011iiiiiiiii10nnnnn11111") // ARMv8.5 +//INST(ST2G_1, "ST2G (post-index)", "11011001101iiiiiiiii01nnnnn11111") // ARMv8.5 +//INST(ST2G_2, "ST2G (pre-index)", "11011001101iiiiiiiii11nnnnn11111") // ARMv8.5 +//INST(ST2G_3, "ST2G (signed-offset)", "11011001101iiiiiiiii10nnnnn11111") // ARMv8.5 +//INST(STGV, "STGV", "1101100110100000000000nnnnnttttt") // ARMv8.5 +//INST(STZ2G_1, "STZ2G (post-index)", "11011001111iiiiiiiii01nnnnn11111") // ARMv8.5 +//INST(STZ2G_2, "STZ2G (pre-index)", "11011001111iiiiiiiii11nnnnn11111") // ARMv8.5 +//INST(STZ2G_3, "STZ2G (signed-offset)", "11011001111iiiiiiiii10nnnnn11111") // ARMv8.5 +//INST(LDGV, "LDGV", "1101100111100000000000nnnnnttttt") // ARMv8.5 + +// Loads and stores - Load/Store register (pointer authentication) +//INST(LDRA, "LDRAA, LDRAB", "11111000MS1iiiiiiiiiW1nnnnnttttt") + +// Data Processing - Register - 2 source +INST(UDIV, "UDIV", "z0011010110mmmmm000010nnnnnddddd") +INST(SDIV, "SDIV", "z0011010110mmmmm000011nnnnnddddd") +INST(LSLV, "LSLV", "z0011010110mmmmm001000nnnnnddddd") +INST(LSRV, "LSRV", "z0011010110mmmmm001001nnnnnddddd") +INST(ASRV, "ASRV", "z0011010110mmmmm001010nnnnnddddd") +INST(RORV, "RORV", "z0011010110mmmmm001011nnnnnddddd") +INST(CRC32, "CRC32B, CRC32H, CRC32W, CRC32X", "z0011010110mmmmm0100zznnnnnddddd") +INST(CRC32C, "CRC32CB, CRC32CH, CRC32CW, CRC32CX", "z0011010110mmmmm0101zznnnnnddddd") +//INST(PACGA, "PACGA", "10011010110mmmmm001100nnnnnddddd") +//INST(SUBP, "SUBP", "10011010110mmmmm000000nnnnnddddd") // ARMv8.5 +//INST(IRG, "IRG", "10011010110mmmmm000100nnnnnddddd") // ARMv8.5 +//INST(GMI, "GMI", "10011010110mmmmm000101nnnnnddddd") // ARMv8.5 +//INST(SUBPS, "SUBPS", "10111010110mmmmm000000nnnnnddddd") // ARMv8.5 + +// Data Processing - Register - 1 source +INST(RBIT_int, "RBIT", "z101101011000000000000nnnnnddddd") +INST(REV16_int, "REV16", "z101101011000000000001nnnnnddddd") +INST(REV, "REV", "z10110101100000000001onnnnnddddd") +INST(CLZ_int, "CLZ", "z101101011000000000100nnnnnddddd") +INST(CLS_int, "CLS", "z101101011000000000101nnnnnddddd") +INST(REV32_int, "REV32", "1101101011000000000010nnnnnddddd") +//INST(PACDA, "PACDA, PACDZA", "110110101100000100Z010nnnnnddddd") +//INST(PACDB, "PACDB, PACDZB", "110110101100000100Z011nnnnnddddd") +//INST(AUTDA, "AUTDA, AUTDZA", "110110101100000100Z110nnnnnddddd") +//INST(AUTDB, "AUTDB, AUTDZB", "110110101100000100Z111nnnnnddddd") + +// Data Processing - Register - Logical (shifted register) +INST(AND_shift, "AND (shifted register)", "z0001010ss0mmmmmiiiiiinnnnnddddd") +INST(BIC_shift, "BIC (shifted register)", "z0001010ss1mmmmmiiiiiinnnnnddddd") +INST(ORR_shift, "ORR (shifted register)", "z0101010ss0mmmmmiiiiiinnnnnddddd") +INST(ORN_shift, "ORN (shifted register)", "z0101010ss1mmmmmiiiiiinnnnnddddd") +INST(EOR_shift, "EOR (shifted register)", "z1001010ss0mmmmmiiiiiinnnnnddddd") +INST(EON, "EON (shifted register)", "z1001010ss1mmmmmiiiiiinnnnnddddd") +INST(ANDS_shift, "ANDS (shifted register)", "z1101010ss0mmmmmiiiiiinnnnnddddd") +INST(BICS, "BICS (shifted register)", "z1101010ss1mmmmmiiiiiinnnnnddddd") + +// Data Processing - Register - Add/Sub (shifted register) +INST(ADD_shift, "ADD (shifted register)", "z0001011ss0mmmmmiiiiiinnnnnddddd") +INST(ADDS_shift, "ADDS (shifted register)", "z0101011ss0mmmmmiiiiiinnnnnddddd") +INST(SUB_shift, "SUB (shifted register)", "z1001011ss0mmmmmiiiiiinnnnnddddd") +INST(SUBS_shift, "SUBS (shifted register)", "z1101011ss0mmmmmiiiiiinnnnnddddd") + +// Data Processing - Register - Add/Sub (shifted register) +INST(ADD_ext, "ADD (extended register)", "z0001011001mmmmmxxxiiinnnnnddddd") +INST(ADDS_ext, "ADDS (extended register)", "z0101011001mmmmmxxxiiinnnnnddddd") +INST(SUB_ext, "SUB (extended register)", "z1001011001mmmmmxxxiiinnnnnddddd") +INST(SUBS_ext, "SUBS (extended register)", "z1101011001mmmmmxxxiiinnnnnddddd") + +// Data Processing - Register - Add/Sub (with carry) +INST(ADC, "ADC", "z0011010000mmmmm000000nnnnnddddd") +INST(ADCS, "ADCS", "z0111010000mmmmm000000nnnnnddddd") +INST(SBC, "SBC", "z1011010000mmmmm000000nnnnnddddd") +INST(SBCS, "SBCS", "z1111010000mmmmm000000nnnnnddddd") + +// Data Processing - Register - Conditional compare +INST(CCMN_reg, "CCMN (register)", "z0111010010mmmmmcccc00nnnnn0ffff") +INST(CCMP_reg, "CCMP (register)", "z1111010010mmmmmcccc00nnnnn0ffff") +INST(CCMN_imm, "CCMN (immediate)", "z0111010010iiiiicccc10nnnnn0ffff") +INST(CCMP_imm, "CCMP (immediate)", "z1111010010iiiiicccc10nnnnn0ffff") + +// Data Processing - Register - Conditional select +INST(CSEL, "CSEL", "z0011010100mmmmmcccc00nnnnnddddd") +INST(CSINC, "CSINC", "z0011010100mmmmmcccc01nnnnnddddd") +INST(CSINV, "CSINV", "z1011010100mmmmmcccc00nnnnnddddd") +INST(CSNEG, "CSNEG", "z1011010100mmmmmcccc01nnnnnddddd") + +// Data Processing - Register - 3 source +INST(MADD, "MADD", "z0011011000mmmmm0aaaaannnnnddddd") +INST(MSUB, "MSUB", "z0011011000mmmmm1aaaaannnnnddddd") +INST(SMADDL, "SMADDL", "10011011001mmmmm0aaaaannnnnddddd") +INST(SMSUBL, "SMSUBL", "10011011001mmmmm1aaaaannnnnddddd") +INST(SMULH, "SMULH", "10011011010mmmmm011111nnnnnddddd") +INST(UMADDL, "UMADDL", "10011011101mmmmm0aaaaannnnnddddd") +INST(UMSUBL, "UMSUBL", "10011011101mmmmm1aaaaannnnnddddd") +INST(UMULH, "UMULH", "10011011110mmmmm011111nnnnnddddd") + +// Data Processing - FP and SIMD - AES +INST(AESE, "AESE", "0100111000101000010010nnnnnddddd") +INST(AESD, "AESD", "0100111000101000010110nnnnnddddd") +INST(AESMC, "AESMC", "0100111000101000011010nnnnnddddd") +INST(AESIMC, "AESIMC", "0100111000101000011110nnnnnddddd") + +// Data Processing - FP and SIMD - SHA +INST(SHA1C, "SHA1C", "01011110000mmmmm000000nnnnnddddd") +INST(SHA1P, "SHA1P", "01011110000mmmmm000100nnnnnddddd") +INST(SHA1M, "SHA1M", "01011110000mmmmm001000nnnnnddddd") +INST(SHA1SU0, "SHA1SU0", "01011110000mmmmm001100nnnnnddddd") +INST(SHA256H, "SHA256H", "01011110000mmmmm010000nnnnnddddd") +INST(SHA256H2, "SHA256H2", "01011110000mmmmm010100nnnnnddddd") +INST(SHA256SU1, "SHA256SU1", "01011110000mmmmm011000nnnnnddddd") +INST(SHA1H, "SHA1H", "0101111000101000000010nnnnnddddd") +INST(SHA1SU1, "SHA1SU1", "0101111000101000000110nnnnnddddd") +INST(SHA256SU0, "SHA256SU0", "0101111000101000001010nnnnnddddd") + +// Data Processing - FP and SIMD - Scalar copy +INST(DUP_elt_1, "DUP (element)", "01011110000iiiii000001nnnnnddddd") + +// Data Processing - FP and SIMD - Scalar three +//INST(FMULX_vec_1, "FMULX", "01011110010mmmmm000111nnnnnddddd") +INST(FMULX_vec_2, "FMULX", "010111100z1mmmmm110111nnnnnddddd") +INST(FCMEQ_reg_1, "FCMEQ (register)", "01011110010mmmmm001001nnnnnddddd") +INST(FCMEQ_reg_2, "FCMEQ (register)", "010111100z1mmmmm111001nnnnnddddd") +INST(FRECPS_1, "FRECPS", "01011110010mmmmm001111nnnnnddddd") +INST(FRECPS_2, "FRECPS", "010111100z1mmmmm111111nnnnnddddd") +INST(FRSQRTS_1, "FRSQRTS", "01011110110mmmmm001111nnnnnddddd") +INST(FRSQRTS_2, "FRSQRTS", "010111101z1mmmmm111111nnnnnddddd") +//INST(FCMGE_reg_1, "FCMGE (register)", "01111110010mmmmm001001nnnnnddddd") +INST(FCMGE_reg_2, "FCMGE (register)", "011111100z1mmmmm111001nnnnnddddd") +//INST(FACGE_1, "FACGE", "01111110010mmmmm001011nnnnnddddd") +INST(FACGE_2, "FACGE", "011111100z1mmmmm111011nnnnnddddd") +//INST(FABD_1, "FABD", "01111110110mmmmm000101nnnnnddddd") +INST(FABD_2, "FABD", "011111101z1mmmmm110101nnnnnddddd") +//INST(FCMGT_reg_1, "FCMGT (register)", "01111110110mmmmm001001nnnnnddddd") +INST(FCMGT_reg_2, "FCMGT (register)", "011111101z1mmmmm111001nnnnnddddd") +//INST(FACGT_1, "FACGT", "01111110110mmmmm001011nnnnnddddd") +INST(FACGT_2, "FACGT", "011111101z1mmmmm111011nnnnnddddd") + +// Data Processing - FP and SIMD - Scalar two register misc +//INST(FCVTNS_1, "FCVTNS (vector)", "0101111001111001101010nnnnnddddd") +INST(FCVTNS_2, "FCVTNS (vector)", "010111100z100001101010nnnnnddddd") +//INST(FCVTMS_1, "FCVTMS (vector)", "0101111001111001101110nnnnnddddd") +INST(FCVTMS_2, "FCVTMS (vector)", "010111100z100001101110nnnnnddddd") +//INST(FCVTAS_1, "FCVTAS (vector)", "0101111001111001110010nnnnnddddd") +INST(FCVTAS_2, "FCVTAS (vector)", "010111100z100001110010nnnnnddddd") +//INST(SCVTF_int_1, "SCVTF (vector, integer)", "0101111001111001110110nnnnnddddd") +INST(SCVTF_int_2, "SCVTF (vector, integer)", "010111100z100001110110nnnnnddddd") +//INST(FCMGT_zero_1, "FCMGT (zero)", "0101111011111000110010nnnnnddddd") +INST(FCMGT_zero_2, "FCMGT (zero)", "010111101z100000110010nnnnnddddd") +INST(FCMEQ_zero_1, "FCMEQ (zero)", "0101111011111000110110nnnnnddddd") +INST(FCMEQ_zero_2, "FCMEQ (zero)", "010111101z100000110110nnnnnddddd") +//INST(FCMLT_1, "FCMLT (zero)", "0101111011111000111010nnnnnddddd") +INST(FCMLT_2, "FCMLT (zero)", "010111101z100000111010nnnnnddddd") +//INST(FCVTPS_1, "FCVTPS (vector)", "0101111011111001101010nnnnnddddd") +INST(FCVTPS_2, "FCVTPS (vector)", "010111101z100001101010nnnnnddddd") +//INST(FCVTZS_int_1, "FCVTZS (vector, integer)", "0101111011111001101110nnnnnddddd") +INST(FCVTZS_int_2, "FCVTZS (vector, integer)", "010111101z100001101110nnnnnddddd") +INST(FRECPE_1, "FRECPE", "0101111011111001110110nnnnnddddd") +INST(FRECPE_2, "FRECPE", "010111101z100001110110nnnnnddddd") +INST(FRECPX_1, "FRECPX", "0101111011111001111110nnnnnddddd") +INST(FRECPX_2, "FRECPX", "010111101z100001111110nnnnnddddd") +//INST(FCVTNU_1, "FCVTNU (vector)", "0111111001111001101010nnnnnddddd") +INST(FCVTNU_2, "FCVTNU (vector)", "011111100z100001101010nnnnnddddd") +//INST(FCVTMU_1, "FCVTMU (vector)", "0111111001111001101110nnnnnddddd") +INST(FCVTMU_2, "FCVTMU (vector)", "011111100z100001101110nnnnnddddd") +//INST(FCVTAU_1, "FCVTAU (vector)", "0111111001111001110010nnnnnddddd") +INST(FCVTAU_2, "FCVTAU (vector)", "011111100z100001110010nnnnnddddd") +//INST(UCVTF_int_1, "UCVTF (vector, integer)", "0111111001111001110110nnnnnddddd") +INST(UCVTF_int_2, "UCVTF (vector, integer)", "011111100z100001110110nnnnnddddd") +//INST(FCMGE_zero_1, "FCMGE (zero)", "0111111011111000110010nnnnnddddd") +INST(FCMGE_zero_2, "FCMGE (zero)", "011111101z100000110010nnnnnddddd") +//INST(FCMLE_1, "FCMLE (zero)", "0111111011111000110110nnnnnddddd") +INST(FCMLE_2, "FCMLE (zero)", "011111101z100000110110nnnnnddddd") +//INST(FCVTPU_1, "FCVTPU (vector)", "0111111011111001101010nnnnnddddd") +INST(FCVTPU_2, "FCVTPU (vector)", "011111101z100001101010nnnnnddddd") +//INST(FCVTZU_int_1, "FCVTZU (vector, integer)", "0111111011111001101110nnnnnddddd") +INST(FCVTZU_int_2, "FCVTZU (vector, integer)", "011111101z100001101110nnnnnddddd") +INST(FRSQRTE_1, "FRSQRTE", "0111111011111001110110nnnnnddddd") +INST(FRSQRTE_2, "FRSQRTE", "011111101z100001110110nnnnnddddd") + +// Data Processing - FP and SIMD - Scalar three same extra +//INST(SQRDMLAH_vec_1, "SQRDMLAH (vector)", "01111110zz0mmmmm100001nnnnnddddd") +//INST(SQRDMLAH_vec_2, "SQRDMLAH (vector)", "0Q101110zz0mmmmm100001nnnnnddddd") +//INST(SQRDMLSH_vec_1, "SQRDMLSH (vector)", "01111110zz0mmmmm100011nnnnnddddd") +//INST(SQRDMLSH_vec_2, "SQRDMLSH (vector)", "0Q101110zz0mmmmm100011nnnnnddddd") + +// Data Processing - FP and SIMD - Scalar two-register misc +INST(SUQADD_1, "SUQADD", "01011110zz100000001110nnnnnddddd") +INST(SQABS_1, "SQABS", "01011110zz100000011110nnnnnddddd") +INST(CMGT_zero_1, "CMGT (zero)", "01011110zz100000100010nnnnnddddd") +INST(CMEQ_zero_1, "CMEQ (zero)", "01011110zz100000100110nnnnnddddd") +INST(CMLT_1, "CMLT (zero)", "01011110zz100000101010nnnnnddddd") +INST(ABS_1, "ABS", "01011110zz100000101110nnnnnddddd") +INST(SQXTN_1, "SQXTN, SQXTN2", "01011110zz100001010010nnnnnddddd") +INST(USQADD_1, "USQADD", "01111110zz100000001110nnnnnddddd") +INST(SQNEG_1, "SQNEG", "01111110zz100000011110nnnnnddddd") +INST(CMGE_zero_1, "CMGE (zero)", "01111110zz100000100010nnnnnddddd") +INST(CMLE_1, "CMLE (zero)", "01111110zz100000100110nnnnnddddd") +INST(NEG_1, "NEG (vector)", "01111110zz100000101110nnnnnddddd") +INST(SQXTUN_1, "SQXTUN, SQXTUN2", "01111110zz100001001010nnnnnddddd") +INST(UQXTN_1, "UQXTN, UQXTN2", "01111110zz100001010010nnnnnddddd") +INST(FCVTXN_1, "FCVTXN, FCVTXN2", "011111100z100001011010nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Scalar pairwise +INST(ADDP_pair, "ADDP (scalar)", "01011110zz110001101110nnnnnddddd") +//INST(FMAXNMP_pair_1, "FMAXNMP (scalar)", "0101111000110000110010nnnnnddddd") +INST(FMAXNMP_pair_2, "FMAXNMP (scalar)", "011111100z110000110010nnnnnddddd") +//INST(FADDP_pair_1, "FADDP (scalar)", "0101111000110000110110nnnnnddddd") +INST(FADDP_pair_2, "FADDP (scalar)", "011111100z110000110110nnnnnddddd") +//INST(FMAXP_pair_1, "FMAXP (scalar)", "0101111000110000111110nnnnnddddd") +INST(FMAXP_pair_2, "FMAXP (scalar)", "011111100z110000111110nnnnnddddd") +//INST(FMINNMP_pair_1, "FMINNMP (scalar)", "0101111010110000110010nnnnnddddd") +INST(FMINNMP_pair_2, "FMINNMP (scalar)", "011111101z110000110010nnnnnddddd") +//INST(FMINP_pair_1, "FMINP (scalar)", "0101111010110000111110nnnnnddddd") +INST(FMINP_pair_2, "FMINP (scalar)", "011111101z110000111110nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Scalar three different +//INST(SQDMLAL_vec_1, "SQDMLAL, SQDMLAL2 (vector)", "01011110zz1mmmmm100100nnnnnddddd") +//INST(SQDMLSL_vec_1, "SQDMLSL, SQDMLSL2 (vector)", "01011110zz1mmmmm101100nnnnnddddd") +//INST(SQDMULL_vec_1, "SQDMULL, SQDMULL2 (vector)", "01011110zz1mmmmm110100nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Scalar three same +INST(SQADD_1, "SQADD", "01011110zz1mmmmm000011nnnnnddddd") +INST(SQSUB_1, "SQSUB", "01011110zz1mmmmm001011nnnnnddddd") +INST(CMGT_reg_1, "CMGT (register)", "01011110zz1mmmmm001101nnnnnddddd") +INST(CMGE_reg_1, "CMGE (register)", "01011110zz1mmmmm001111nnnnnddddd") +INST(SSHL_1, "SSHL", "01011110zz1mmmmm010001nnnnnddddd") +INST(SQSHL_reg_1, "SQSHL (register)", "01011110zz1mmmmm010011nnnnnddddd") +INST(SRSHL_1, "SRSHL", "01011110zz1mmmmm010101nnnnnddddd") +//INST(SQRSHL_1, "SQRSHL", "01011110zz1mmmmm010111nnnnnddddd") +INST(ADD_1, "ADD (vector)", "01011110zz1mmmmm100001nnnnnddddd") +INST(CMTST_1, "CMTST", "01011110zz1mmmmm100011nnnnnddddd") +INST(SQDMULH_vec_1, "SQDMULH (vector)", "01011110zz1mmmmm101101nnnnnddddd") +INST(UQADD_1, "UQADD", "01111110zz1mmmmm000011nnnnnddddd") +INST(UQSUB_1, "UQSUB", "01111110zz1mmmmm001011nnnnnddddd") +INST(CMHI_1, "CMHI (register)", "01111110zz1mmmmm001101nnnnnddddd") +INST(CMHS_1, "CMHS (register)", "01111110zz1mmmmm001111nnnnnddddd") +INST(USHL_1, "USHL", "01111110zz1mmmmm010001nnnnnddddd") +INST(UQSHL_reg_1, "UQSHL (register)", "01111110zz1mmmmm010011nnnnnddddd") +INST(URSHL_1, "URSHL", "01111110zz1mmmmm010101nnnnnddddd") +//INST(UQRSHL_1, "UQRSHL", "01111110zz1mmmmm010111nnnnnddddd") +INST(SUB_1, "SUB (vector)", "01111110zz1mmmmm100001nnnnnddddd") +INST(CMEQ_reg_1, "CMEQ (register)", "01111110zz1mmmmm100011nnnnnddddd") +INST(SQRDMULH_vec_1, "SQRDMULH (vector)", "01111110zz1mmmmm101101nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Scalar shift by immediate +INST(SSHR_1, "SSHR", "010111110IIIIiii000001nnnnnddddd") +INST(SSRA_1, "SSRA", "010111110IIIIiii000101nnnnnddddd") +INST(SRSHR_1, "SRSHR", "010111110IIIIiii001001nnnnnddddd") +INST(SRSRA_1, "SRSRA", "010111110IIIIiii001101nnnnnddddd") +INST(SHL_1, "SHL", "010111110IIIIiii010101nnnnnddddd") +INST(SQSHL_imm_1, "SQSHL (immediate)", "010111110IIIIiii011101nnnnnddddd") +INST(SQSHRN_1, "SQSHRN, SQSHRN2", "010111110IIIIiii100101nnnnnddddd") +//INST(SQRSHRN_1, "SQRSHRN, SQRSHRN2", "010111110IIIIiii100111nnnnnddddd") +INST(SCVTF_fix_1, "SCVTF (vector, fixed-point)", "010111110IIIIiii111001nnnnnddddd") +INST(FCVTZS_fix_1, "FCVTZS (vector, fixed-point)", "010111110IIIIiii111111nnnnnddddd") +INST(USHR_1, "USHR", "011111110IIIIiii000001nnnnnddddd") +INST(USRA_1, "USRA", "011111110IIIIiii000101nnnnnddddd") +INST(URSHR_1, "URSHR", "011111110IIIIiii001001nnnnnddddd") +INST(URSRA_1, "URSRA", "011111110IIIIiii001101nnnnnddddd") +INST(SRI_1, "SRI", "011111110IIIIiii010001nnnnnddddd") +INST(SLI_1, "SLI", "011111110IIIIiii010101nnnnnddddd") +INST(SQSHLU_1, "SQSHLU", "011111110IIIIiii011001nnnnnddddd") +INST(UQSHL_imm_1, "UQSHL (immediate)", "011111110IIIIiii011101nnnnnddddd") +INST(SQSHRUN_1, "SQSHRUN, SQSHRUN2", "011111110IIIIiii100001nnnnnddddd") +//INST(SQRSHRUN_1, "SQRSHRUN, SQRSHRUN2", "011111110IIIIiii100011nnnnnddddd") +INST(UQSHRN_1, "UQSHRN, UQSHRN2", "011111110IIIIiii100101nnnnnddddd") +//INST(UQRSHRN_1, "UQRSHRN, UQRSHRN2", "011111110IIIIiii100111nnnnnddddd") +INST(UCVTF_fix_1, "UCVTF (vector, fixed-point)", "011111110IIIIiii111001nnnnnddddd") +INST(FCVTZU_fix_1, "FCVTZU (vector, fixed-point)", "011111110IIIIiii111111nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Scalar x indexed element +//INST(SQDMLAL_elt_1, "SQDMLAL, SQDMLAL2 (by element)", "01011111zzLMmmmm0011H0nnnnnddddd") +//INST(SQDMLSL_elt_1, "SQDMLSL, SQDMLSL2 (by element)", "01011111zzLMmmmm0111H0nnnnnddddd") +INST(SQDMULL_elt_1, "SQDMULL, SQDMULL2 (by element)", "01011111zzLMmmmm1011H0nnnnnddddd") +INST(SQDMULH_elt_1, "SQDMULH (by element)", "01011111zzLMmmmm1100H0nnnnnddddd") +INST(SQRDMULH_elt_1, "SQRDMULH (by element)", "01011111zzLMmmmm1101H0nnnnnddddd") +INST(FMLA_elt_1, "FMLA (by element)", "0101111100LMmmmm0001H0nnnnnddddd") +INST(FMLA_elt_2, "FMLA (by element)", "010111111zLMmmmm0001H0nnnnnddddd") +INST(FMLS_elt_1, "FMLS (by element)", "0101111100LMmmmm0101H0nnnnnddddd") +INST(FMLS_elt_2, "FMLS (by element)", "010111111zLMmmmm0101H0nnnnnddddd") +//INST(FMUL_elt_1, "FMUL (by element)", "0101111100LMmmmm1001H0nnnnnddddd") +INST(FMUL_elt_2, "FMUL (by element)", "010111111zLMmmmm1001H0nnnnnddddd") +//INST(SQRDMLAH_elt_1, "SQRDMLAH (by element)", "01111111zzLMmmmm1101H0nnnnnddddd") +//INST(SQRDMLSH_elt_1, "SQRDMLSH (by element)", "01111111zzLMmmmm1111H0nnnnnddddd") +//INST(FMULX_elt_1, "FMULX (by element)", "0111111100LMmmmm1001H0nnnnnddddd") +INST(FMULX_elt_2, "FMULX (by element)", "011111111zLMmmmm1001H0nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Table Lookup +INST(TBL, "TBL", "0Q001110000mmmmm0LL000nnnnnddddd") +INST(TBX, "TBX", "0Q001110000mmmmm0LL100nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Permute +INST(UZP1, "UZP1", "0Q001110zz0mmmmm000110nnnnnddddd") +INST(TRN1, "TRN1", "0Q001110zz0mmmmm001010nnnnnddddd") +INST(ZIP1, "ZIP1", "0Q001110zz0mmmmm001110nnnnnddddd") +INST(UZP2, "UZP2", "0Q001110zz0mmmmm010110nnnnnddddd") +INST(TRN2, "TRN2", "0Q001110zz0mmmmm011010nnnnnddddd") +INST(ZIP2, "ZIP2", "0Q001110zz0mmmmm011110nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Extract +INST(EXT, "EXT", "0Q101110000mmmmm0iiii0nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Copy +INST(DUP_elt_2, "DUP (element)", "0Q001110000iiiii000001nnnnnddddd") +INST(DUP_gen, "DUP (general)", "0Q001110000iiiii000011nnnnnddddd") +INST(SMOV, "SMOV", "0Q001110000iiiii001011nnnnnddddd") +INST(UMOV, "UMOV", "0Q001110000iiiii001111nnnnnddddd") +INST(INS_gen, "INS (general)", "01001110000iiiii000111nnnnnddddd") +INST(INS_elt, "INS (element)", "01101110000iiiii0iiii1nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Three same +//INST(FMULX_vec_3, "FMULX", "0Q001110010mmmmm000111nnnnnddddd") +INST(FCMEQ_reg_3, "FCMEQ (register)", "0Q001110010mmmmm001001nnnnnddddd") +INST(FRECPS_3, "FRECPS", "0Q001110010mmmmm001111nnnnnddddd") +INST(FRSQRTS_3, "FRSQRTS", "0Q001110110mmmmm001111nnnnnddddd") +//INST(FCMGE_reg_3, "FCMGE (register)", "0Q101110010mmmmm001001nnnnnddddd") +//INST(FACGE_3, "FACGE", "0Q101110010mmmmm001011nnnnnddddd") +//INST(FABD_3, "FABD", "0Q101110110mmmmm000101nnnnnddddd") +//INST(FCMGT_reg_3, "FCMGT (register)", "0Q101110110mmmmm001001nnnnnddddd") +//INST(FACGT_3, "FACGT", "0Q101110110mmmmm001011nnnnnddddd") +//INST(FMAXNM_1, "FMAXNM (vector)", "0Q001110010mmmmm000001nnnnnddddd") +INST(FMLA_vec_1, "FMLA (vector)", "0Q001110010mmmmm000011nnnnnddddd") +//INST(FADD_1, "FADD (vector)", "0Q001110010mmmmm000101nnnnnddddd") +//INST(FMAX_1, "FMAX (vector)", "0Q001110010mmmmm001101nnnnnddddd") +//INST(FMINNM_1, "FMINNM (vector)", "0Q001110110mmmmm000001nnnnnddddd") +INST(FMLS_vec_1, "FMLS (vector)", "0Q001110110mmmmm000011nnnnnddddd") +//INST(FSUB_1, "FSUB (vector)", "0Q001110110mmmmm000101nnnnnddddd") +//INST(FMIN_1, "FMIN (vector)", "0Q001110110mmmmm001101nnnnnddddd") +//INST(FMAXNMP_vec_1, "FMAXNMP (vector)", "0Q101110010mmmmm000001nnnnnddddd") +//INST(FADDP_vec_1, "FADDP (vector)", "0Q101110010mmmmm000101nnnnnddddd") +//INST(FMUL_vec_1, "FMUL (vector)", "0Q101110010mmmmm000111nnnnnddddd") +//INST(FMAXP_vec_1, "FMAXP (vector)", "0Q101110010mmmmm001101nnnnnddddd") +//INST(FDIV_1, "FDIV (vector)", "0Q101110010mmmmm001111nnnnnddddd") +//INST(FMINNMP_vec_1, "FMINNMP (vector)", "0Q101110110mmmmm000001nnnnnddddd") +//INST(FMINP_vec_1, "FMINP (vector)", "0Q101110110mmmmm001101nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Three same extra +//INST(SMMLA_vec, "SMMLA", "01001110100mmmmm101001nnnnnddddd") // v8.6 +//INST(UMMLA_vec, "UMMLA", "01101110100mmmmm101001nnnnnddddd") // v8.6 +//INST(USMMLA_vec, "USMMLA", "01001110100mmmmm101011nnnnnddddd") // v8.6 +//INST(SUDOT_element, "SUDOT (by element)", "0Q00111100LMmmmm1111H0nnnnnddddd") // v8.6 +//INST(USDOT_element, "USDOT (by_element)", "0Q00111110LMmmmm1111H0nnnnnddddd") // v8.6 +//INST(USDOT_vec, "USDOT (vector)", "0Q001110100mmmmm100111nnnnnddddd") // v8.6 +INST(SDOT_vec, "SDOT (vector)", "0Q001110zz0mmmmm100101nnnnnddddd") +INST(UDOT_vec, "UDOT (vector)", "0Q101110zz0mmmmm100101nnnnnddddd") +INST(FCMLA_vec, "FCMLA", "0Q101110zz0mmmmm110rr1nnnnnddddd") +INST(FCADD_vec, "FCADD", "0Q101110zz0mmmmm111r01nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Two-register misc +INST(REV64_asimd, "REV64", "0Q001110zz100000000010nnnnnddddd") +INST(REV16_asimd, "REV16 (vector)", "0Q001110zz100000000110nnnnnddddd") +INST(SADDLP, "SADDLP", "0Q001110zz100000001010nnnnnddddd") +INST(SUQADD_2, "SUQADD", "0Q001110zz100000001110nnnnnddddd") +INST(CLS_asimd, "CLS (vector)", "0Q001110zz100000010010nnnnnddddd") +INST(CNT, "CNT", "0Q001110zz100000010110nnnnnddddd") +INST(SADALP, "SADALP", "0Q001110zz100000011010nnnnnddddd") +INST(SQABS_2, "SQABS", "0Q001110zz100000011110nnnnnddddd") +INST(CMGT_zero_2, "CMGT (zero)", "0Q001110zz100000100010nnnnnddddd") +INST(CMEQ_zero_2, "CMEQ (zero)", "0Q001110zz100000100110nnnnnddddd") +INST(CMLT_2, "CMLT (zero)", "0Q001110zz100000101010nnnnnddddd") +INST(ABS_2, "ABS", "0Q001110zz100000101110nnnnnddddd") +INST(XTN, "XTN, XTN2", "0Q001110zz100001001010nnnnnddddd") +INST(SQXTN_2, "SQXTN, SQXTN2", "0Q001110zz100001010010nnnnnddddd") +INST(FCVTN, "FCVTN, FCVTN2", "0Q0011100z100001011010nnnnnddddd") +INST(FCVTL, "FCVTL, FCVTL2", "0Q0011100z100001011110nnnnnddddd") +INST(FRINTN_1, "FRINTN (vector)", "0Q00111001111001100010nnnnnddddd") +INST(FRINTN_2, "FRINTN (vector)", "0Q0011100z100001100010nnnnnddddd") +INST(FRINTM_1, "FRINTM (vector)", "0Q00111001111001100110nnnnnddddd") +INST(FRINTM_2, "FRINTM (vector)", "0Q0011100z100001100110nnnnnddddd") +//INST(FCVTNS_3, "FCVTNS (vector)", "0Q00111001111001101010nnnnnddddd") +INST(FCVTNS_4, "FCVTNS (vector)", "0Q0011100z100001101010nnnnnddddd") +//INST(FCVTMS_3, "FCVTMS (vector)", "0Q00111001111001101110nnnnnddddd") +INST(FCVTMS_4, "FCVTMS (vector)", "0Q0011100z100001101110nnnnnddddd") +//INST(FCVTAS_3, "FCVTAS (vector)", "0Q00111001111001110010nnnnnddddd") +INST(FCVTAS_4, "FCVTAS (vector)", "0Q0011100z100001110010nnnnnddddd") +//INST(SCVTF_int_3, "SCVTF (vector, integer)", "0Q00111001111001110110nnnnnddddd") +INST(SCVTF_int_4, "SCVTF (vector, integer)", "0Q0011100z100001110110nnnnnddddd") +//INST(FCMGT_zero_3, "FCMGT (zero)", "0Q00111011111000110010nnnnnddddd") +INST(FCMGT_zero_4, "FCMGT (zero)", "0Q0011101z100000110010nnnnnddddd") +INST(FCMEQ_zero_3, "FCMEQ (zero)", "0Q00111011111000110110nnnnnddddd") +INST(FCMEQ_zero_4, "FCMEQ (zero)", "0Q0011101z100000110110nnnnnddddd") +//INST(FCMLT_3, "FCMLT (zero)", "0Q00111011111000111010nnnnnddddd") +INST(FCMLT_4, "FCMLT (zero)", "0Q0011101z100000111010nnnnnddddd") +INST(FABS_1, "FABS (vector)", "0Q00111011111000111110nnnnnddddd") +INST(FABS_2, "FABS (vector)", "0Q0011101z100000111110nnnnnddddd") +INST(FRINTP_1, "FRINTP (vector)", "0Q00111011111001100010nnnnnddddd") +INST(FRINTP_2, "FRINTP (vector)", "0Q0011101z100001100010nnnnnddddd") +INST(FRINTZ_1, "FRINTZ (vector)", "0Q00111011111001100110nnnnnddddd") +INST(FRINTZ_2, "FRINTZ (vector)", "0Q0011101z100001100110nnnnnddddd") +//INST(FCVTPS_3, "FCVTPS (vector)", "0Q00111011111001101010nnnnnddddd") +INST(FCVTPS_4, "FCVTPS (vector)", "0Q0011101z100001101010nnnnnddddd") +//INST(FCVTZS_int_3, "FCVTZS (vector, integer)", "0Q00111011111001101110nnnnnddddd") +INST(FCVTZS_int_4, "FCVTZS (vector, integer)", "0Q0011101z100001101110nnnnnddddd") +INST(URECPE, "URECPE", "0Q0011101z100001110010nnnnnddddd") +INST(FRECPE_3, "FRECPE", "0Q00111011111001110110nnnnnddddd") +INST(FRECPE_4, "FRECPE", "0Q0011101z100001110110nnnnnddddd") +INST(REV32_asimd, "REV32 (vector)", "0Q101110zz100000000010nnnnnddddd") +INST(UADDLP, "UADDLP", "0Q101110zz100000001010nnnnnddddd") +INST(USQADD_2, "USQADD", "0Q101110zz100000001110nnnnnddddd") +INST(CLZ_asimd, "CLZ (vector)", "0Q101110zz100000010010nnnnnddddd") +INST(UADALP, "UADALP", "0Q101110zz100000011010nnnnnddddd") +INST(SQNEG_2, "SQNEG", "0Q101110zz100000011110nnnnnddddd") +INST(CMGE_zero_2, "CMGE (zero)", "0Q101110zz100000100010nnnnnddddd") +INST(CMLE_2, "CMLE (zero)", "0Q101110zz100000100110nnnnnddddd") +INST(NEG_2, "NEG (vector)", "0Q101110zz100000101110nnnnnddddd") +INST(SQXTUN_2, "SQXTUN, SQXTUN2", "0Q101110zz100001001010nnnnnddddd") +INST(SHLL, "SHLL, SHLL2", "0Q101110zz100001001110nnnnnddddd") +INST(UQXTN_2, "UQXTN, UQXTN2", "0Q101110zz100001010010nnnnnddddd") +INST(FCVTXN_2, "FCVTXN, FCVTXN2", "0Q1011100z100001011010nnnnnddddd") +INST(FRINTA_1, "FRINTA (vector)", "0Q10111001111001100010nnnnnddddd") +INST(FRINTA_2, "FRINTA (vector)", "0Q1011100z100001100010nnnnnddddd") +INST(FRINTX_1, "FRINTX (vector)", "0Q10111001111001100110nnnnnddddd") +INST(FRINTX_2, "FRINTX (vector)", "0Q1011100z100001100110nnnnnddddd") +//INST(FCVTNU_3, "FCVTNU (vector)", "0Q10111001111001101010nnnnnddddd") +INST(FCVTNU_4, "FCVTNU (vector)", "0Q1011100z100001101010nnnnnddddd") +//INST(FCVTMU_3, "FCVTMU (vector)", "0Q10111001111001101110nnnnnddddd") +INST(FCVTMU_4, "FCVTMU (vector)", "0Q1011100z100001101110nnnnnddddd") +//INST(FCVTAU_3, "FCVTAU (vector)", "0Q10111001111001110010nnnnnddddd") +INST(FCVTAU_4, "FCVTAU (vector)", "0Q1011100z100001110010nnnnnddddd") +//INST(UCVTF_int_3, "UCVTF (vector, integer)", "0Q10111001111001110110nnnnnddddd") +INST(UCVTF_int_4, "UCVTF (vector, integer)", "0Q1011100z100001110110nnnnnddddd") +INST(NOT, "NOT", "0Q10111000100000010110nnnnnddddd") +INST(RBIT_asimd, "RBIT (vector)", "0Q10111001100000010110nnnnnddddd") +INST(FNEG_1, "FNEG (vector)", "0Q10111011111000111110nnnnnddddd") +INST(FNEG_2, "FNEG (vector)", "0Q1011101z100000111110nnnnnddddd") +INST(FRINTI_1, "FRINTI (vector)", "0Q10111011111001100110nnnnnddddd") +INST(FRINTI_2, "FRINTI (vector)", "0Q1011101z100001100110nnnnnddddd") +//INST(FCMGE_zero_3, "FCMGE (zero)", "0Q10111011111000110010nnnnnddddd") +INST(FCMGE_zero_4, "FCMGE (zero)", "0Q1011101z100000110010nnnnnddddd") +//INST(FCMLE_3, "FCMLE (zero)", "0Q10111011111000110110nnnnnddddd") +INST(FCMLE_4, "FCMLE (zero)", "0Q1011101z100000110110nnnnnddddd") +//INST(FCVTPU_3, "FCVTPU (vector)", "0Q10111011111001101010nnnnnddddd") +INST(FCVTPU_4, "FCVTPU (vector)", "0Q1011101z100001101010nnnnnddddd") +//INST(FCVTZU_int_3, "FCVTZU (vector, integer)", "0Q10111011111001101110nnnnnddddd") +INST(FCVTZU_int_4, "FCVTZU (vector, integer)", "0Q1011101z100001101110nnnnnddddd") +INST(URSQRTE, "URSQRTE", "0Q1011101z100001110010nnnnnddddd") +INST(FRSQRTE_3, "FRSQRTE", "0Q10111011111001110110nnnnnddddd") +INST(FRSQRTE_4, "FRSQRTE", "0Q1011101z100001110110nnnnnddddd") +//INST(FSQRT_1, "FSQRT (vector)", "0Q10111011111001111110nnnnnddddd") +INST(FSQRT_2, "FSQRT (vector)", "0Q1011101z100001111110nnnnnddddd") +//INST(FRINT32X_1, "FRINT32X (vector)", "0Q1011100z100001111110nnnnnddddd") // ARMv8.5 +//INST(FRINT64X_1, "FRINT64X (vector)", "0Q1011100z100001111010nnnnnddddd") // ARMv8.5 +//INST(FRINT32Z_1, "FRINT32Z (vector)", "0Q0011100z100001111010nnnnnddddd") // ARMv8.5 +//INST(FRINT64Z_1, "FRINT64Z (vector)", "0Q0011100z100001111110nnnnnddddd") // ARMv8.5 + +// Data Processing - FP and SIMD - SIMD across lanes +INST(SADDLV, "SADDLV", "0Q001110zz110000001110nnnnnddddd") +INST(SMAXV, "SMAXV", "0Q001110zz110000101010nnnnnddddd") +INST(SMINV, "SMINV", "0Q001110zz110001101010nnnnnddddd") +INST(ADDV, "ADDV", "0Q001110zz110001101110nnnnnddddd") +//INST(FMAXNMV_1, "FMAXNMV", "0Q00111000110000110010nnnnnddddd") +INST(FMAXNMV_2, "FMAXNMV", "0Q1011100z110000110010nnnnnddddd") +//INST(FMAXV_1, "FMAXV", "0Q00111000110000111110nnnnnddddd") +INST(FMAXV_2, "FMAXV", "0Q1011100z110000111110nnnnnddddd") +//INST(FMINNMV_1, "FMINNMV", "0Q00111010110000110010nnnnnddddd") +INST(FMINNMV_2, "FMINNMV", "0Q1011101z110000110010nnnnnddddd") +//INST(FMINV_1, "FMINV", "0Q00111010110000111110nnnnnddddd") +INST(FMINV_2, "FMINV", "0Q1011101z110000111110nnnnnddddd") +INST(UADDLV, "UADDLV", "0Q101110zz110000001110nnnnnddddd") +INST(UMAXV, "UMAXV", "0Q101110zz110000101010nnnnnddddd") +INST(UMINV, "UMINV", "0Q101110zz110001101010nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD three different +INST(SADDL, "SADDL, SADDL2", "0Q001110zz1mmmmm000000nnnnnddddd") +INST(SADDW, "SADDW, SADDW2", "0Q001110zz1mmmmm000100nnnnnddddd") +INST(SSUBL, "SSUBL, SSUBL2", "0Q001110zz1mmmmm001000nnnnnddddd") +INST(SSUBW, "SSUBW, SSUBW2", "0Q001110zz1mmmmm001100nnnnnddddd") +INST(ADDHN, "ADDHN, ADDHN2", "0Q001110zz1mmmmm010000nnnnnddddd") +INST(SABAL, "SABAL, SABAL2", "0Q001110zz1mmmmm010100nnnnnddddd") +INST(SUBHN, "SUBHN, SUBHN2", "0Q001110zz1mmmmm011000nnnnnddddd") +INST(SABDL, "SABDL, SABDL2", "0Q001110zz1mmmmm011100nnnnnddddd") +INST(SMLAL_vec, "SMLAL, SMLAL2 (vector)", "0Q001110zz1mmmmm100000nnnnnddddd") +INST(SMLSL_vec, "SMLSL, SMLSL2 (vector)", "0Q001110zz1mmmmm101000nnnnnddddd") +INST(SMULL_vec, "SMULL, SMULL2 (vector)", "0Q001110zz1mmmmm110000nnnnnddddd") +INST(PMULL, "PMULL, PMULL2", "0Q001110zz1mmmmm111000nnnnnddddd") +INST(UADDL, "UADDL, UADDL2", "0Q101110zz1mmmmm000000nnnnnddddd") +INST(UADDW, "UADDW, UADDW2", "0Q101110zz1mmmmm000100nnnnnddddd") +INST(USUBL, "USUBL, USUBL2", "0Q101110zz1mmmmm001000nnnnnddddd") +INST(USUBW, "USUBW, USUBW2", "0Q101110zz1mmmmm001100nnnnnddddd") +INST(RADDHN, "RADDHN, RADDHN2", "0Q101110zz1mmmmm010000nnnnnddddd") +INST(UABAL, "UABAL, UABAL2", "0Q101110zz1mmmmm010100nnnnnddddd") +INST(RSUBHN, "RSUBHN, RSUBHN2", "0Q101110zz1mmmmm011000nnnnnddddd") +INST(UABDL, "UABDL, UABDL2", "0Q101110zz1mmmmm011100nnnnnddddd") +INST(UMLAL_vec, "UMLAL, UMLAL2 (vector)", "0Q101110zz1mmmmm100000nnnnnddddd") +INST(UMLSL_vec, "UMLSL, UMLSL2 (vector)", "0Q101110zz1mmmmm101000nnnnnddddd") +INST(UMULL_vec, "UMULL, UMULL2 (vector)", "0Q101110zz1mmmmm110000nnnnnddddd") +//INST(SQDMLAL_vec_2, "SQDMLAL, SQDMLAL2 (vector)", "0Q001110zz1mmmmm100100nnnnnddddd") +//INST(SQDMLSL_vec_2, "SQDMLSL, SQDMLSL2 (vector)", "0Q001110zz1mmmmm101100nnnnnddddd") +INST(SQDMULL_vec_2, "SQDMULL, SQDMULL2 (vector)", "0Q001110zz1mmmmm110100nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD three same +INST(SHADD, "SHADD", "0Q001110zz1mmmmm000001nnnnnddddd") +INST(SQADD_2, "SQADD", "0Q001110zz1mmmmm000011nnnnnddddd") +INST(SRHADD, "SRHADD", "0Q001110zz1mmmmm000101nnnnnddddd") +INST(SHSUB, "SHSUB", "0Q001110zz1mmmmm001001nnnnnddddd") +INST(SQSUB_2, "SQSUB", "0Q001110zz1mmmmm001011nnnnnddddd") +INST(CMGT_reg_2, "CMGT (register)", "0Q001110zz1mmmmm001101nnnnnddddd") +INST(CMGE_reg_2, "CMGE (register)", "0Q001110zz1mmmmm001111nnnnnddddd") +INST(SSHL_2, "SSHL", "0Q001110zz1mmmmm010001nnnnnddddd") +INST(SQSHL_reg_2, "SQSHL (register)", "0Q001110zz1mmmmm010011nnnnnddddd") +INST(SRSHL_2, "SRSHL", "0Q001110zz1mmmmm010101nnnnnddddd") +//INST(SQRSHL_2, "SQRSHL", "0Q001110zz1mmmmm010111nnnnnddddd") +INST(SMAX, "SMAX", "0Q001110zz1mmmmm011001nnnnnddddd") +INST(SMIN, "SMIN", "0Q001110zz1mmmmm011011nnnnnddddd") +INST(SABD, "SABD", "0Q001110zz1mmmmm011101nnnnnddddd") +INST(SABA, "SABA", "0Q001110zz1mmmmm011111nnnnnddddd") +INST(ADD_vector, "ADD (vector)", "0Q001110zz1mmmmm100001nnnnnddddd") +INST(CMTST_2, "CMTST", "0Q001110zz1mmmmm100011nnnnnddddd") +INST(MLA_vec, "MLA (vector)", "0Q001110zz1mmmmm100101nnnnnddddd") +INST(MUL_vec, "MUL (vector)", "0Q001110zz1mmmmm100111nnnnnddddd") +INST(SMAXP, "SMAXP", "0Q001110zz1mmmmm101001nnnnnddddd") +INST(SMINP, "SMINP", "0Q001110zz1mmmmm101011nnnnnddddd") +INST(SQDMULH_vec_2, "SQDMULH (vector)", "0Q001110zz1mmmmm101101nnnnnddddd") +INST(ADDP_vec, "ADDP (vector)", "0Q001110zz1mmmmm101111nnnnnddddd") +INST(FMAXNM_2, "FMAXNM (vector)", "0Q0011100z1mmmmm110001nnnnnddddd") +INST(FMLA_vec_2, "FMLA (vector)", "0Q0011100z1mmmmm110011nnnnnddddd") +INST(FADD_2, "FADD (vector)", "0Q0011100z1mmmmm110101nnnnnddddd") +INST(FMAX_2, "FMAX (vector)", "0Q0011100z1mmmmm111101nnnnnddddd") +INST(FMULX_vec_4, "FMULX", "0Q0011100z1mmmmm110111nnnnnddddd") +INST(FCMEQ_reg_4, "FCMEQ (register)", "0Q0011100z1mmmmm111001nnnnnddddd") +//INST(FMLAL_vec_1, "FMLAL, FMLAL2 (vector)", "0Q0011100z1mmmmm111011nnnnnddddd") +INST(FRECPS_4, "FRECPS", "0Q0011100z1mmmmm111111nnnnnddddd") +INST(AND_asimd, "AND (vector)", "0Q001110001mmmmm000111nnnnnddddd") +INST(BIC_asimd_reg, "BIC (vector, register)", "0Q001110011mmmmm000111nnnnnddddd") +INST(FMINNM_2, "FMINNM (vector)", "0Q0011101z1mmmmm110001nnnnnddddd") +INST(FMLS_vec_2, "FMLS (vector)", "0Q0011101z1mmmmm110011nnnnnddddd") +INST(FSUB_2, "FSUB (vector)", "0Q0011101z1mmmmm110101nnnnnddddd") +//INST(FMLSL_vec_1, "FMLSL, FMLSL2 (vector)", "0Q0011101z1mmmmm111011nnnnnddddd") +INST(FMIN_2, "FMIN (vector)", "0Q0011101z1mmmmm111101nnnnnddddd") +INST(FRSQRTS_4, "FRSQRTS", "0Q0011101z1mmmmm111111nnnnnddddd") +INST(ORR_asimd_reg, "ORR (vector, register)", "0Q001110101mmmmm000111nnnnnddddd") +INST(ORN_asimd, "ORN (vector)", "0Q001110111mmmmm000111nnnnnddddd") +INST(UHADD, "UHADD", "0Q101110zz1mmmmm000001nnnnnddddd") +INST(UQADD_2, "UQADD", "0Q101110zz1mmmmm000011nnnnnddddd") +INST(URHADD, "URHADD", "0Q101110zz1mmmmm000101nnnnnddddd") +INST(UHSUB, "UHSUB", "0Q101110zz1mmmmm001001nnnnnddddd") +INST(UQSUB_2, "UQSUB", "0Q101110zz1mmmmm001011nnnnnddddd") +INST(CMHI_2, "CMHI (register)", "0Q101110zz1mmmmm001101nnnnnddddd") +INST(CMHS_2, "CMHS (register)", "0Q101110zz1mmmmm001111nnnnnddddd") +INST(USHL_2, "USHL", "0Q101110zz1mmmmm010001nnnnnddddd") +INST(UQSHL_reg_2, "UQSHL (register)", "0Q101110zz1mmmmm010011nnnnnddddd") +INST(URSHL_2, "URSHL", "0Q101110zz1mmmmm010101nnnnnddddd") +//INST(UQRSHL_2, "UQRSHL", "0Q101110zz1mmmmm010111nnnnnddddd") +INST(UMAX, "UMAX", "0Q101110zz1mmmmm011001nnnnnddddd") +INST(UMIN, "UMIN", "0Q101110zz1mmmmm011011nnnnnddddd") +INST(UABD, "UABD", "0Q101110zz1mmmmm011101nnnnnddddd") +INST(UABA, "UABA", "0Q101110zz1mmmmm011111nnnnnddddd") +INST(SUB_2, "SUB (vector)", "0Q101110zz1mmmmm100001nnnnnddddd") +INST(CMEQ_reg_2, "CMEQ (register)", "0Q101110zz1mmmmm100011nnnnnddddd") +INST(MLS_vec, "MLS (vector)", "0Q101110zz1mmmmm100101nnnnnddddd") +INST(PMUL, "PMUL", "0Q101110zz1mmmmm100111nnnnnddddd") +INST(UMAXP, "UMAXP", "0Q101110zz1mmmmm101001nnnnnddddd") +INST(UMINP, "UMINP", "0Q101110zz1mmmmm101011nnnnnddddd") +INST(SQRDMULH_vec_2, "SQRDMULH (vector)", "0Q101110zz1mmmmm101101nnnnnddddd") +INST(FMAXNMP_vec_2, "FMAXNMP (vector)", "0Q1011100z1mmmmm110001nnnnnddddd") +//INST(FMLAL_vec_2, "FMLAL, FMLAL2 (vector)", "0Q1011100z1mmmmm110011nnnnnddddd") +INST(FADDP_vec_2, "FADDP (vector)", "0Q1011100z1mmmmm110101nnnnnddddd") +INST(FMUL_vec_2, "FMUL (vector)", "0Q1011100z1mmmmm110111nnnnnddddd") +INST(FCMGE_reg_4, "FCMGE (register)", "0Q1011100z1mmmmm111001nnnnnddddd") +INST(FACGE_4, "FACGE", "0Q1011100z1mmmmm111011nnnnnddddd") +INST(FMAXP_vec_2, "FMAXP (vector)", "0Q1011100z1mmmmm111101nnnnnddddd") +INST(FDIV_2, "FDIV (vector)", "0Q1011100z1mmmmm111111nnnnnddddd") +INST(EOR_asimd, "EOR (vector)", "0Q101110001mmmmm000111nnnnnddddd") +INST(BSL, "BSL", "0Q101110011mmmmm000111nnnnnddddd") +INST(FMINNMP_vec_2, "FMINNMP (vector)", "0Q1011101z1mmmmm110001nnnnnddddd") +//INST(FMLSL_vec_2, "FMLSL, FMLSL2 (vector)", "0Q1011101z1mmmmm110011nnnnnddddd") +INST(FABD_4, "FABD", "0Q1011101z1mmmmm110101nnnnnddddd") +INST(FCMGT_reg_4, "FCMGT (register)", "0Q1011101z1mmmmm111001nnnnnddddd") +INST(FACGT_4, "FACGT", "0Q1011101z1mmmmm111011nnnnnddddd") +INST(FMINP_vec_2, "FMINP (vector)", "0Q1011101z1mmmmm111101nnnnnddddd") +INST(BIT, "BIT", "0Q101110101mmmmm000111nnnnnddddd") +INST(BIF, "BIF", "0Q101110111mmmmm000111nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD modified immediate +INST(MOVI, "MOVI, MVNI, ORR, BIC (vector, immediate)", "0Qo0111100000abcmmmm01defghddddd") +INST(FMOV_2, "FMOV (vector, immediate)", "0Qo0111100000abc111101defghddddd") +INST(FMOV_3, "FMOV (vector, immediate)", "0Q00111100000abc111111defghddddd") +INST(UnallocatedEncoding, "Unallocated SIMD modified immediate", "0--0111100000-------11----------") + +// Data Processing - FP and SIMD - SIMD Shift by immediate +INST(SSHR_2, "SSHR", "0Q0011110IIIIiii000001nnnnnddddd") +INST(SSRA_2, "SSRA", "0Q0011110IIIIiii000101nnnnnddddd") +INST(SRSHR_2, "SRSHR", "0Q0011110IIIIiii001001nnnnnddddd") +INST(SRSRA_2, "SRSRA", "0Q0011110IIIIiii001101nnnnnddddd") +INST(SHL_2, "SHL", "0Q0011110IIIIiii010101nnnnnddddd") +INST(SQSHL_imm_2, "SQSHL (immediate)", "0Q0011110IIIIiii011101nnnnnddddd") +INST(SHRN, "SHRN, SHRN2", "0Q0011110IIIIiii100001nnnnnddddd") +INST(RSHRN, "RSHRN, RSHRN2", "0Q0011110IIIIiii100011nnnnnddddd") +INST(SQSHRN_2, "SQSHRN, SQSHRN2", "0Q0011110IIIIiii100101nnnnnddddd") +INST(SQRSHRN_2, "SQRSHRN, SQRSHRN2", "0Q0011110IIIIiii100111nnnnnddddd") +INST(SSHLL, "SSHLL, SSHLL2", "0Q0011110IIIIiii101001nnnnnddddd") +INST(SCVTF_fix_2, "SCVTF (vector, fixed-point)", "0Q0011110IIIIiii111001nnnnnddddd") +INST(FCVTZS_fix_2, "FCVTZS (vector, fixed-point)", "0Q0011110IIIIiii111111nnnnnddddd") +INST(USHR_2, "USHR", "0Q1011110IIIIiii000001nnnnnddddd") +INST(USRA_2, "USRA", "0Q1011110IIIIiii000101nnnnnddddd") +INST(URSHR_2, "URSHR", "0Q1011110IIIIiii001001nnnnnddddd") +INST(URSRA_2, "URSRA", "0Q1011110IIIIiii001101nnnnnddddd") +INST(SRI_2, "SRI", "0Q1011110IIIIiii010001nnnnnddddd") +INST(SLI_2, "SLI", "0Q1011110IIIIiii010101nnnnnddddd") +INST(SQSHLU_2, "SQSHLU", "0Q1011110IIIIiii011001nnnnnddddd") +INST(UQSHL_imm_2, "UQSHL (immediate)", "0Q1011110IIIIiii011101nnnnnddddd") +INST(SQSHRUN_2, "SQSHRUN, SQSHRUN2", "0Q1011110IIIIiii100001nnnnnddddd") +INST(SQRSHRUN_2, "SQRSHRUN, SQRSHRUN2", "0Q1011110IIIIiii100011nnnnnddddd") +INST(UQSHRN_2, "UQSHRN, UQSHRN2", "0Q1011110IIIIiii100101nnnnnddddd") +INST(UQRSHRN_2, "UQRSHRN, UQRSHRN2", "0Q1011110IIIIiii100111nnnnnddddd") +INST(USHLL, "USHLL, USHLL2", "0Q1011110IIIIiii101001nnnnnddddd") +INST(UCVTF_fix_2, "UCVTF (vector, fixed-point)", "0Q1011110IIIIiii111001nnnnnddddd") +INST(FCVTZU_fix_2, "FCVTZU (vector, fixed-point)", "0Q1011110IIIIiii111111nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD vector x indexed element +INST(SMLAL_elt, "SMLAL, SMLAL2 (by element)", "0Q001111zzLMmmmm0010H0nnnnnddddd") +//INST(SQDMLAL_elt_2, "SQDMLAL, SQDMLAL2 (by element)", "0Q001111zzLMmmmm0011H0nnnnnddddd") +INST(SMLSL_elt, "SMLSL, SMLSL2 (by element)", "0Q001111zzLMmmmm0110H0nnnnnddddd") +//INST(SQDMLSL_elt_2, "SQDMLSL, SQDMLSL2 (by element)", "0Q001111zzLMmmmm0111H0nnnnnddddd") +INST(MUL_elt, "MUL (by element)", "0Q001111zzLMmmmm1000H0nnnnnddddd") +INST(SMULL_elt, "SMULL, SMULL2 (by element)", "0Q001111zzLMmmmm1010H0nnnnnddddd") +INST(SQDMULL_elt_2, "SQDMULL, SQDMULL2 (by element)", "0Q001111zzLMmmmm1011H0nnnnnddddd") +INST(SQDMULH_elt_2, "SQDMULH (by element)", "0Q001111zzLMmmmm1100H0nnnnnddddd") +INST(SQRDMULH_elt_2, "SQRDMULH (by element)", "0Q001111zzLMmmmm1101H0nnnnnddddd") +INST(SDOT_elt, "SDOT (by element)", "0Q001111zzLMmmmm1110H0nnnnnddddd") +INST(FMLA_elt_3, "FMLA (by element)", "0Q00111100LMmmmm0001H0nnnnnddddd") +INST(FMLA_elt_4, "FMLA (by element)", "0Q0011111zLMmmmm0001H0nnnnnddddd") +INST(FMLS_elt_3, "FMLS (by element)", "0Q00111100LMmmmm0101H0nnnnnddddd") +INST(FMLS_elt_4, "FMLS (by element)", "0Q0011111zLMmmmm0101H0nnnnnddddd") +//INST(FMUL_elt_3, "FMUL (by element)", "0Q00111100LMmmmm1001H0nnnnnddddd") +INST(FMUL_elt_4, "FMUL (by element)", "0Q0011111zLMmmmm1001H0nnnnnddddd") +//INST(FMLAL_elt_1, "FMLAL, FMLAL2 (by element)", "0Q0011111zLMmmmm0000H0nnnnnddddd") +//INST(FMLAL_elt_2, "FMLAL, FMLAL2 (by element)", "0Q1011111zLMmmmm1000H0nnnnnddddd") +//INST(FMLSL_elt_1, "FMLSL, FMLSL2 (by element)", "0Q0011111zLMmmmm0100H0nnnnnddddd") +//INST(FMLSL_elt_2, "FMLSL, FMLSL2 (by element)", "0Q1011111zLMmmmm1100H0nnnnnddddd") +INST(MLA_elt, "MLA (by element)", "0Q101111zzLMmmmm0000H0nnnnnddddd") +INST(UMLAL_elt, "UMLAL, UMLAL2 (by element)", "0Q101111zzLMmmmm0010H0nnnnnddddd") +INST(MLS_elt, "MLS (by element)", "0Q101111zzLMmmmm0100H0nnnnnddddd") +INST(UMLSL_elt, "UMLSL, UMLSL2 (by element)", "0Q101111zzLMmmmm0110H0nnnnnddddd") +INST(UMULL_elt, "UMULL, UMULL2 (by element)", "0Q101111zzLMmmmm1010H0nnnnnddddd") +//INST(SQRDMLAH_elt_2, "SQRDMLAH (by element)", "0Q101111zzLMmmmm1101H0nnnnnddddd") +INST(UDOT_elt, "UDOT (by element)", "0Q101111zzLMmmmm1110H0nnnnnddddd") +//INST(SQRDMLSH_elt_2, "SQRDMLSH (by element)", "0Q101111zzLMmmmm1111H0nnnnnddddd") +//INST(FMULX_elt_3, "FMULX (by element)", "0Q10111100LMmmmm1001H0nnnnnddddd") +INST(FMULX_elt_4, "FMULX (by element)", "0Q1011111zLMmmmm1001H0nnnnnddddd") +INST(FCMLA_elt, "FCMLA (by element)", "0Q101111zzLMmmmm0rr1H0nnnnnddddd") + +// Data Processing - FP and SIMD - Cryptographic three register +INST(SM3TT1A, "SM3TT1A", "11001110010mmmmm10ii00nnnnnddddd") +INST(SM3TT1B, "SM3TT1B", "11001110010mmmmm10ii01nnnnnddddd") +INST(SM3TT2A, "SM3TT2A", "11001110010mmmmm10ii10nnnnnddddd") +INST(SM3TT2B, "SM3TT2B", "11001110010mmmmm10ii11nnnnnddddd") + +// Data Processing - FP and SIMD - SHA512 three register +INST(SHA512H, "SHA512H", "11001110011mmmmm100000nnnnnddddd") +INST(SHA512H2, "SHA512H2", "11001110011mmmmm100001nnnnnddddd") +INST(SHA512SU1, "SHA512SU1", "11001110011mmmmm100010nnnnnddddd") +INST(RAX1, "RAX1", "11001110011mmmmm100011nnnnnddddd") +INST(SM3PARTW1, "SM3PARTW1", "11001110011mmmmm110000nnnnnddddd") +INST(SM3PARTW2, "SM3PARTW2", "11001110011mmmmm110001nnnnnddddd") +INST(SM4EKEY, "SM4EKEY", "11001110011mmmmm110010nnnnnddddd") +INST(XAR, "XAR", "11001110100mmmmmiiiiiinnnnnddddd") + +// Data Processing - FP and SIMD - Cryptographic four register +INST(EOR3, "EOR3", "11001110000mmmmm0aaaaannnnnddddd") +INST(BCAX, "BCAX", "11001110001mmmmm0aaaaannnnnddddd") +INST(SM3SS1, "SM3SS1", "11001110010mmmmm0aaaaannnnnddddd") + +// Data Processing - FP and SIMD - SHA512 two register +INST(SHA512SU0, "SHA512SU0", "1100111011000000100000nnnnnddddd") +INST(SM4E, "SM4E", "1100111011000000100001nnnnnddddd") + +// Data Processing - FP and SIMD - Conversion between floating point and fixed point +INST(SCVTF_float_fix, "SCVTF (scalar, fixed-point)", "z0011110yy000010ppppppnnnnnddddd") +INST(UCVTF_float_fix, "UCVTF (scalar, fixed-point)", "z0011110yy000011ppppppnnnnnddddd") +INST(FCVTZS_float_fix, "FCVTZS (scalar, fixed-point)", "z0011110yy011000ppppppnnnnnddddd") +INST(FCVTZU_float_fix, "FCVTZU (scalar, fixed-point)", "z0011110yy011001ppppppnnnnnddddd") + +// Data Processing - FP and SIMD - Conversion between floating point and integer +INST(FCVTNS_float, "FCVTNS (scalar)", "z0011110yy100000000000nnnnnddddd") +INST(FCVTNU_float, "FCVTNU (scalar)", "z0011110yy100001000000nnnnnddddd") +INST(SCVTF_float_int, "SCVTF (scalar, integer)", "z0011110yy100010000000nnnnnddddd") +INST(UCVTF_float_int, "UCVTF (scalar, integer)", "z0011110yy100011000000nnnnnddddd") +INST(FCVTAS_float, "FCVTAS (scalar)", "z0011110yy100100000000nnnnnddddd") +INST(FCVTAU_float, "FCVTAU (scalar)", "z0011110yy100101000000nnnnnddddd") +INST(FMOV_float_gen, "FMOV (general)", "z0011110yy10r11o000000nnnnnddddd") +INST(FCVTPS_float, "FCVTPS (scalar)", "z0011110yy101000000000nnnnnddddd") +INST(FCVTPU_float, "FCVTPU (scalar)", "z0011110yy101001000000nnnnnddddd") +INST(FCVTMS_float, "FCVTMS (scalar)", "z0011110yy110000000000nnnnnddddd") +INST(FCVTMU_float, "FCVTMU (scalar)", "z0011110yy110001000000nnnnnddddd") +INST(FCVTZS_float_int, "FCVTZS (scalar, integer)", "z0011110yy111000000000nnnnnddddd") +INST(FCVTZU_float_int, "FCVTZU (scalar, integer)", "z0011110yy111001000000nnnnnddddd") +//INST(FJCVTZS, "FJCVTZS", "0001111001111110000000nnnnnddddd") + +// Data Processing - FP and SIMD - Floating point data processing +INST(FMOV_float, "FMOV (register)", "00011110yy100000010000nnnnnddddd") +INST(FABS_float, "FABS (scalar)", "00011110yy100000110000nnnnnddddd") +INST(FNEG_float, "FNEG (scalar)", "00011110yy100001010000nnnnnddddd") +INST(FSQRT_float, "FSQRT (scalar)", "00011110yy100001110000nnnnnddddd") +INST(FCVT_float, "FCVT", "00011110yy10001oo10000nnnnnddddd") +INST(FRINTN_float, "FRINTN (scalar)", "00011110yy100100010000nnnnnddddd") +INST(FRINTP_float, "FRINTP (scalar)", "00011110yy100100110000nnnnnddddd") +INST(FRINTM_float, "FRINTM (scalar)", "00011110yy100101010000nnnnnddddd") +INST(FRINTZ_float, "FRINTZ (scalar)", "00011110yy100101110000nnnnnddddd") +INST(FRINTA_float, "FRINTA (scalar)", "00011110yy100110010000nnnnnddddd") +INST(FRINTX_float, "FRINTX (scalar)", "00011110yy100111010000nnnnnddddd") +INST(FRINTI_float, "FRINTI (scalar)", "00011110yy100111110000nnnnnddddd") +//INST(FRINT32X_float, "FRINT32X (scalar)", "00011110yy101000110000nnnnnddddd") // ARMv8.5 +//INST(FRINT64X_float, "FRINT64X (scalar)", "00011110yy101001110000nnnnnddddd") // ARMv8.5 +//INST(FRINT32Z_float, "FRINT32Z (scalar)", "00011110yy101000010000nnnnnddddd") // ARMv8.5 +//INST(FRINT64Z_float, "FRINT64Z (scalar)", "00011110yy101001010000nnnnnddddd") // ARMv8.5 + +// Data Processing - FP and SIMD - Floating point compare +INST(FCMP_float, "FCMP", "00011110yy1mmmmm001000nnnnn0o000") +INST(FCMPE_float, "FCMPE", "00011110yy1mmmmm001000nnnnn1o000") + +// Data Processing - FP and SIMD - Floating point immediate +INST(FMOV_float_imm, "FMOV (scalar, immediate)", "00011110yy1iiiiiiii10000000ddddd") + +// Data Processing - FP and SIMD - Floating point conditional compare +INST(FCCMP_float, "FCCMP", "00011110yy1mmmmmcccc01nnnnn0ffff") +INST(FCCMPE_float, "FCCMPE", "00011110yy1mmmmmcccc01nnnnn1ffff") + +// Data Processing - FP and SIMD - Floating point data processing two register +INST(FMUL_float, "FMUL (scalar)", "00011110yy1mmmmm000010nnnnnddddd") +INST(FDIV_float, "FDIV (scalar)", "00011110yy1mmmmm000110nnnnnddddd") +INST(FADD_float, "FADD (scalar)", "00011110yy1mmmmm001010nnnnnddddd") +INST(FSUB_float, "FSUB (scalar)", "00011110yy1mmmmm001110nnnnnddddd") +INST(FMAX_float, "FMAX (scalar)", "00011110yy1mmmmm010010nnnnnddddd") +INST(FMIN_float, "FMIN (scalar)", "00011110yy1mmmmm010110nnnnnddddd") +INST(FMAXNM_float, "FMAXNM (scalar)", "00011110yy1mmmmm011010nnnnnddddd") +INST(FMINNM_float, "FMINNM (scalar)", "00011110yy1mmmmm011110nnnnnddddd") +INST(FNMUL_float, "FNMUL (scalar)", "00011110yy1mmmmm100010nnnnnddddd") + +// Data Processing - FP and SIMD - Floating point conditional select +INST(FCSEL_float, "FCSEL", "00011110yy1mmmmmcccc11nnnnnddddd") + +// Data Processing - FP and SIMD - Floating point data processing three register +INST(FMADD_float, "FMADD", "00011111yy0mmmmm0aaaaannnnnddddd") +INST(FMSUB_float, "FMSUB", "00011111yy0mmmmm1aaaaannnnnddddd") +INST(FNMADD_float, "FNMADD", "00011111yy1mmmmm0aaaaannnnnddddd") +INST(FNMSUB_float, "FNMSUB", "00011111yy1mmmmm1aaaaannnnnddddd") + +// BFloat16 +//INST(BFCVT, "BFCVT", "0001111001100011010000nnnnnddddd") // v8.6 +//INST(BFCVTN, "BFCVTN{2}", "0Q00111010100001011010nnnnnddddd") // v8.6 +//INST(BFDOT_element, "BFDOT (by element)", "0Q00111101LMmmmm1111H0nnnnnddddd") // v8.6 +//INST(BFDOT_vec, "BFDOT (vector)", "0Q101110010mmmmm111111nnnnnddddd") // v8.6 +//INST(BFMLALX_element, "BFMLALX (by element)", "0Q00111111LMmmmm1111H0nnnnnddddd") // v8.6 +//INST(BFMLALX_vector, "BFMLALX (vector)", "0Q101110110mmmmm111111nnnnnddddd") // v8.6 +//INST(BFMMLA, "BFMMLA", "01101110010mmmmm111011nnnnnddddd") // v8.6 +#undef INST + }; + // If a matcher has more bits in its mask it is more specific, so it should come first. + std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) { + // If a matcher has more bits in its mask it is more specific, so it should come first. + return mcl::bit::count_ones(a.second) > mcl::bit::count_ones(b.second); + }); + // Exceptions to the above rule of thumb. + std::stable_partition(list.begin(), list.end(), [&](const auto& e) { + return std::set{ + "MOVI, MVNI, ORR, BIC (vector, immediate)", + "FMOV (vector, immediate)", + "Unallocated SIMD modified immediate", + }.count(e.first) > 0; + }); + for (auto const& e : list) + printf("%s\n", e.inst_final); + } + return 0; +} +