[cmake] refactor: Use CPM over submodules (#143)

Transfers the majority of submodules and large externals to CPM, using source archives rather than full Git clones. Not only does this save massive amounts of clone and configure time, but dependencies are grabbed on-demand rather than being required by default. Additionally, CPM will (generally) automatically search for system dependencies, though certain dependencies have options to control this.

Testing shows gains ranging from 5x to 10x in terms of overall clone/configure time.

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/143
Reviewed-by: CamilleLaVey <camillelavey99@gmail.com>
This commit is contained in:
crueter 2025-08-04 04:50:14 +02:00
parent 04e5e64538
commit 51b170b470
No known key found for this signature in database
GPG key ID: 425ACD2D4830EBC6
4035 changed files with 709 additions and 1033458 deletions

View file

@ -6,6 +6,13 @@
# Enable modules to include each other's files
include_directories(.)
# Dynarmic
if ((ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64))
set(DYNARMIC_IGNORE_ASSERTS ON)
add_subdirectory(dynarmic)
add_library(dynarmic::dynarmic ALIAS dynarmic)
endif()
# CMake seems to only define _DEBUG on Windows
set_property(DIRECTORY APPEND PROPERTY
COMPILE_DEFINITIONS $<$<CONFIG:Debug>:_DEBUG> $<$<NOT:$<CONFIG:Debug>>:NDEBUG>)

View file

@ -226,7 +226,9 @@ else()
)
endif()
target_link_libraries(audio_core PUBLIC common core Opus::opus)
target_include_directories(audio_core PRIVATE ${OPUS_INCLUDE_DIRS})
target_link_libraries(audio_core PUBLIC common core opus)
if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
target_link_libraries(audio_core PRIVATE dynarmic::dynarmic)
endif()
@ -237,7 +239,7 @@ if (ENABLE_CUBEB)
sink/cubeb_sink.h
)
target_link_libraries(audio_core PRIVATE cubeb::cubeb)
target_link_libraries(audio_core PRIVATE cubeb)
target_compile_definitions(audio_core PRIVATE -DHAVE_CUBEB=1)
endif()

View file

@ -1184,6 +1184,7 @@ else()
)
endif()
target_include_directories(core PRIVATE ${OPUS_INCLUDE_DIRS})
target_link_libraries(core PUBLIC common PRIVATE audio_core hid_core network video_core nx_tzdb tz)
target_link_libraries(core PUBLIC Boost::headers PRIVATE fmt::fmt nlohmann_json::nlohmann_json mbedtls RenderDoc::API)
if (MINGW)

214
src/dynarmic/.clang-format Normal file
View file

@ -0,0 +1,214 @@
---
Language: Cpp
AccessModifierOffset: -4
AlignAfterOpenBracket: Align
AlignConsecutiveMacros: None
AlignConsecutiveAssignments: None
AlignConsecutiveBitFields: None
AlignConsecutiveDeclarations: None
AlignEscapedNewlines: Right
AlignOperands: AlignAfterOperator
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortEnumsOnASingleLine: true
AllowShortBlocksOnASingleLine: Empty
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AllowShortLambdasOnASingleLine: All
AllowShortIfStatementsOnASingleLine: Never
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
AttributeMacros:
- __capability
BinPackArguments: true
BinPackParameters: false
BitFieldColonSpacing: Both
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: Never
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
BeforeLambdaBody: false
BeforeWhile: false
IndentBraces: false
SplitEmptyFunction: false
SplitEmptyRecord: false
SplitEmptyNamespace: false
BreakBeforeBinaryOperators: All
BreakBeforeBraces: Custom
BreakBeforeConceptDeclarations: true
BreakBeforeTernaryOperators: true
BreakBeforeInheritanceComma: false
BreakConstructorInitializersBeforeComma: true
BreakConstructorInitializers: BeforeComma
BreakInheritanceList: BeforeComma
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 0
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 8
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DeriveLineEnding: true
DerivePointerAlignment: false
DisableFormat: false
# EmptyLineAfterAccessModifier: Leave
EmptyLineBeforeAccessModifier: Always
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^<mach/'
Priority: 1
SortPriority: 0
CaseSensitive: false
- Regex: '^<windows.h>'
Priority: 1
SortPriority: 0
CaseSensitive: false
- Regex: '(^<signal.h>)|(^<sys/ucontext.h>)|(^<ucontext.h>)'
Priority: 1
SortPriority: 0
CaseSensitive: false
- Regex: '^<([^\.])*>$'
Priority: 2
SortPriority: 0
CaseSensitive: false
- Regex: '^<.*\.'
Priority: 3
SortPriority: 0
CaseSensitive: false
- Regex: '.*'
Priority: 4
SortPriority: 0
CaseSensitive: false
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
# IndentAccessModifiers: false
IndentCaseBlocks: false
IndentCaseLabels: false
IndentExternBlock: NoIndent
IndentGotoLabels: false
IndentPPDirectives: AfterHash
IndentRequires: false
IndentWidth: 4
IndentWrappedFunctionNames: false
# InsertTrailingCommas: None
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
NamespaceMacros:
ObjCBinPackProtocolList: Never
ObjCBlockIndentWidth: 2
ObjCBreakBeforeNestedBlockParam: true
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PenaltyIndentedWhitespace: 0
PointerAlignment: Left
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
- Cpp
- CPP
- 'c++'
- 'C++'
CanonicalDelimiter: ''
BasedOnStyle: google
- Language: TextProto
Delimiters:
- pb
- PB
- proto
- PROTO
EnclosingFunctions:
- EqualsProto
- EquivToProto
- PARSE_PARTIAL_TEXT_PROTO
- PARSE_TEST_PROTO
- PARSE_TEXT_PROTO
- ParseTextOrDie
- ParseTextProtoOrDie
- ParseTestProto
- ParsePartialTestProto
CanonicalDelimiter: ''
BasedOnStyle: google
ReflowComments: true
# ShortNamespaceLines: 5
SortIncludes: true
SortJavaStaticImport: Before
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: false
SpaceAroundPointerQualifiers: Default
SpaceBeforeAssignmentOperators: true
SpaceBeforeCaseColon: false
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceBeforeSquareBrackets: false
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInConditionalStatement: false
SpacesInCStyleCastParentheses: false
SpacesInContainerLiterals: false
# SpacesInLineCommentPrefix: -1
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Latest
StatementAttributeLikeMacros:
- Q_EMIT
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
TabWidth: 4
TypenameMacros:
UseCRLF: false
UseTab: Never
WhitespaceSensitiveMacros:
- STRINGIZE
- PP_STRINGIZE
- BOOST_PP_STRINGIZE
- NS_SWIFT_NAME
- CF_SWIFT_NAME
- FCODE
- ICODE
...

13
src/dynarmic/.gitignore vendored Normal file
View file

@ -0,0 +1,13 @@
# Built files
build/
build-*/
cmake-build-*/
.idea/
docs/Doxygen/
# Generated files
src/dynarmic/backend/arm64/mig/
src/dynarmic/backend/x64/mig/
# System files
.DS_Store
.vscode
.cache/

209
src/dynarmic/CMakeLists.txt Normal file
View file

@ -0,0 +1,209 @@
cmake_minimum_required(VERSION 3.12)
project(dynarmic LANGUAGES C CXX ASM VERSION 6.7.0)
# Determine if we're built as a subproject (using add_subdirectory)
# or if this is the master project.
set(MASTER_PROJECT OFF)
if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
set(MASTER_PROJECT ON)
endif()
if (MASTER_PROJECT)
include(CTest)
endif()
# Dynarmic project options
option(DYNARMIC_ENABLE_CPU_FEATURE_DETECTION "Turning this off causes dynarmic to assume the host CPU doesn't support anything later than SSE3" ON)
option(DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT "Enables support for systems that require W^X" OFF)
option(DYNARMIC_FATAL_ERRORS "Errors are fatal" OFF)
option(DYNARMIC_IGNORE_ASSERTS "Ignore asserts" OFF)
option(DYNARMIC_TESTS_USE_UNICORN "Enable fuzzing tests against unicorn" OFF)
option(DYNARMIC_USE_LLVM "Support disassembly of jitted x86_64 code using LLVM" OFF)
option(DYNARMIC_USE_PRECOMPILED_HEADERS "Use precompiled headers" ON)
option(DYNARMIC_USE_BUNDLED_EXTERNALS "Use all bundled externals (useful when e.g. cross-compiling)" OFF)
option(DYNARMIC_WARNINGS_AS_ERRORS "Warnings as errors" ${MASTER_PROJECT})
if (NOT DEFINED DYNARMIC_FRONTENDS)
set(DYNARMIC_FRONTENDS "A32;A64" CACHE STRING "Selects which frontends to enable")
endif()
# Default to a Release build
if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." FORCE)
message(STATUS "Defaulting to a Release build")
endif()
# Set hard requirements for C++
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
# Disable in-source builds
# set(CMAKE_DISABLE_SOURCE_CHANGES ON)
# set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
message(SEND_ERROR "In-source builds are not allowed.")
endif()
# Add the module directory to the list of paths
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")
# Arch detection
include(DetectArchitecture)
if (NOT DEFINED ARCHITECTURE)
message(FATAL_ERROR "Unsupported architecture encountered. Ending CMake generation.")
endif()
message(STATUS "Target architecture: ${ARCHITECTURE}")
# Compiler flags
if (MSVC)
set(DYNARMIC_CXX_FLAGS
/experimental:external
/external:W0
/external:anglebrackets
/W4
/w44263 # Non-virtual member function hides base class virtual function
/w44265 # Class has virtual functions, but destructor is not virtual
/w44456 # Declaration of 'var' hides previous local declaration
/w44457 # Declaration of 'var' hides function parameter
/w44458 # Declaration of 'var' hides class member
/w44459 # Declaration of 'var' hides global definition
/w44946 # Reinterpret-cast between related types
/wd4592 # Symbol will be dynamically initialized (implementation limitation)
/permissive- # Stricter C++ standards conformance
/MP
/Zi
/Zo
/EHsc
/Zc:externConstexpr # Allows external linkage for variables declared "extern constexpr", as the standard permits.
/Zc:inline # Omits inline functions from object-file output.
/Zc:throwingNew # Assumes new (without std::nothrow) never returns null.
/volatile:iso # Use strict standard-abiding volatile semantics
/bigobj # Increase number of sections in .obj files
/DNOMINMAX)
if (DYNARMIC_WARNINGS_AS_ERRORS)
list(APPEND DYNARMIC_CXX_FLAGS
/WX)
endif()
if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
list(APPEND DYNARMIC_CXX_FLAGS
-Qunused-arguments
-Wno-missing-braces)
endif()
else()
set(DYNARMIC_CXX_FLAGS
-Wall
-Wextra
-Wcast-qual
-pedantic
-Wno-missing-braces)
if (ARCHITECTURE STREQUAL "x86_64")
list(APPEND DYNARMIC_CXX_FLAGS -mtune=core2)
endif()
if (DYNARMIC_WARNINGS_AS_ERRORS)
list(APPEND DYNARMIC_CXX_FLAGS
-pedantic-errors
-Werror)
endif()
if (DYNARMIC_FATAL_ERRORS)
list(APPEND DYNARMIC_CXX_FLAGS
-Wfatal-errors)
endif()
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU")
# GCC produces bogus -Warray-bounds warnings from xbyak headers for code paths that are not
# actually reachable. Specifically, it happens in cases where some code casts an Operand&
# to Address& after first checking isMEM(), and that code is inlined in a situation where
# GCC knows that the variable is actually a Reg64. isMEM() will never return true for a
# Reg64, but GCC doesn't know that.
list(APPEND DYNARMIC_CXX_FLAGS -Wno-array-bounds)
list(APPEND DYNARMIC_CXX_FLAGS -Wstack-usage=4096)
endif()
if (CMAKE_CXX_COMPILER_ID MATCHES "[Cc]lang")
# Bracket depth determines maximum size of a fold expression in Clang since 9c9974c3ccb6.
# And this in turns limits the size of a std::array.
list(APPEND DYNARMIC_CXX_FLAGS -fbracket-depth=1024)
# Clang mistakenly blames CMake for using unused arguments during compilation
list(APPEND DYNARMIC_CXX_FLAGS -Wno-unused-command-line-argument)
endif()
endif()
# Forced use of individual bundled libraries for non-REQUIRED library is possible with e.g. cmake -DCMAKE_DISABLE_FIND_PACKAGE_fmt=ON ...
if (DYNARMIC_USE_BUNDLED_EXTERNALS)
set(CMAKE_DISABLE_FIND_PACKAGE_biscuit ON)
set(CMAKE_DISABLE_FIND_PACKAGE_Catch2 ON)
set(CMAKE_DISABLE_FIND_PACKAGE_fmt ON)
set(CMAKE_DISABLE_FIND_PACKAGE_mcl ON)
set(CMAKE_DISABLE_FIND_PACKAGE_oaknut ON)
set(CMAKE_DISABLE_FIND_PACKAGE_unordered_dense ON)
set(CMAKE_DISABLE_FIND_PACKAGE_xbyak ON)
set(CMAKE_DISABLE_FIND_PACKAGE_Zydis ON)
set(CMAKE_DISABLE_FIND_PACKAGE_Zycore ON)
endif()
set(CPM_USE_LOCAL_PACKAGES ON)
find_package(Boost 1.57 REQUIRED)
find_package(fmt 9 CONFIG)
if ("arm64" IN_LIST ARCHITECTURE OR DYNARMIC_TESTS)
find_package(oaknut 2.0.1 CONFIG)
endif()
if ("x86_64" IN_LIST ARCHITECTURE)
find_package(xbyak 7 CONFIG)
endif()
if (DYNARMIC_USE_LLVM)
find_package(LLVM REQUIRED)
separate_arguments(LLVM_DEFINITIONS)
endif()
if (DYNARMIC_TESTS)
find_package(Catch2 3 CONFIG)
if (DYNARMIC_TESTS_USE_UNICORN)
find_package(Unicorn REQUIRED)
endif()
endif()
# Pull in externals CMakeLists for libs where available
add_subdirectory(externals)
# Dynarmic project files
add_subdirectory(src/dynarmic)
if (DYNARMIC_TESTS)
add_subdirectory(tests)
endif()
#
# Install
#
include(GNUInstallDirs)
include(CMakePackageConfigHelpers)
install(TARGETS dynarmic EXPORT dynarmicTargets)
install(EXPORT dynarmicTargets
NAMESPACE dynarmic::
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/dynarmic"
)
configure_package_config_file(CMakeModules/dynarmicConfig.cmake.in
dynarmicConfig.cmake
INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/dynarmic"
)
write_basic_package_version_file(dynarmicConfigVersion.cmake
COMPATIBILITY SameMajorVersion
)
install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/dynarmicConfig.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/dynarmicConfigVersion.cmake"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/dynarmic"
)
install(DIRECTORY src/dynarmic TYPE INCLUDE FILES_MATCHING PATTERN "*.h")

View file

@ -0,0 +1,17 @@
# This function should be passed a name of an existing target. It will automatically generate
# file groups following the directory hierarchy, so that the layout of the files in IDEs matches the
# one in the filesystem.
function(create_target_directory_groups target_name)
# Place any files that aren't in the source list in a separate group so that they don't get in
# the way.
source_group("Other Files" REGULAR_EXPRESSION ".")
get_target_property(target_sources "${target_name}" SOURCES)
foreach(file_name IN LISTS target_sources)
get_filename_component(dir_name "${file_name}" PATH)
# Group names use '\' as a separator even though the entire rest of CMake uses '/'...
string(REPLACE "/" "\\" group_name "${dir_name}")
source_group("${group_name}" FILES "${file_name}")
endforeach()
endfunction()

View file

@ -0,0 +1,62 @@
include(CheckSymbolExists)
if (CMAKE_OSX_ARCHITECTURES)
set(DYNARMIC_MULTIARCH_BUILD 1)
set(ARCHITECTURE "${CMAKE_OSX_ARCHITECTURES}")
return()
endif()
function(detect_architecture symbol arch)
if (NOT DEFINED ARCHITECTURE)
set(CMAKE_REQUIRED_QUIET YES)
check_symbol_exists("${symbol}" "" DETECT_ARCHITECTURE_${arch})
unset(CMAKE_REQUIRED_QUIET)
if (DETECT_ARCHITECTURE_${arch})
set(ARCHITECTURE "${arch}" PARENT_SCOPE)
endif()
unset(DETECT_ARCHITECTURE_${arch} CACHE)
endif()
endfunction()
detect_architecture("__ARM64__" arm64)
detect_architecture("__aarch64__" arm64)
detect_architecture("_M_ARM64" arm64)
detect_architecture("__arm__" arm)
detect_architecture("__TARGET_ARCH_ARM" arm)
detect_architecture("_M_ARM" arm)
detect_architecture("__x86_64" x86_64)
detect_architecture("__x86_64__" x86_64)
detect_architecture("__amd64" x86_64)
detect_architecture("_M_X64" x86_64)
detect_architecture("__i386" x86)
detect_architecture("__i386__" x86)
detect_architecture("_M_IX86" x86)
detect_architecture("__ia64" ia64)
detect_architecture("__ia64__" ia64)
detect_architecture("_M_IA64" ia64)
detect_architecture("__mips" mips)
detect_architecture("__mips__" mips)
detect_architecture("_M_MRX000" mips)
detect_architecture("__ppc64__" ppc64)
detect_architecture("__powerpc64__" ppc64)
detect_architecture("__ppc__" ppc)
detect_architecture("__ppc" ppc)
detect_architecture("__powerpc__" ppc)
detect_architecture("_ARCH_COM" ppc)
detect_architecture("_ARCH_PWR" ppc)
detect_architecture("_ARCH_PPC" ppc)
detect_architecture("_M_MPPC" ppc)
detect_architecture("_M_PPC" ppc)
detect_architecture("__riscv" riscv)
detect_architecture("__EMSCRIPTEN__" wasm)

View file

@ -0,0 +1,37 @@
# Exports:
#
# Variables:
# LIBUNICORN_FOUND
# LIBUNICORN_INCLUDE_DIR
# LIBUNICORN_LIBRARY
#
# Target:
# Unicorn::Unicorn
#
find_path(LIBUNICORN_INCLUDE_DIR
unicorn/unicorn.h
HINTS $ENV{UNICORNDIR}
PATH_SUFFIXES include)
find_library(LIBUNICORN_LIBRARY
NAMES unicorn
HINTS $ENV{UNICORNDIR})
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Unicorn DEFAULT_MSG LIBUNICORN_LIBRARY LIBUNICORN_INCLUDE_DIR)
if (UNICORN_FOUND)
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
unset(THREADS_PREFER_PTHREAD_FLAG)
add_library(Unicorn::Unicorn UNKNOWN IMPORTED)
set_target_properties(Unicorn::Unicorn PROPERTIES
IMPORTED_LOCATION ${LIBUNICORN_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${LIBUNICORN_INCLUDE_DIR}
INTERFACE_LINK_LIBRARIES Threads::Threads
)
endif()
mark_as_advanced(LIBUNICORN_INCLUDE_DIR LIBUNICORN_LIBRARY)

View file

@ -0,0 +1,26 @@
function(target_architecture_specific_sources project arch)
if (NOT DYNARMIC_MULTIARCH_BUILD)
target_sources("${project}" PRIVATE ${ARGN})
return()
endif()
foreach(input_file IN LISTS ARGN)
if(input_file MATCHES ".cpp$")
if(NOT IS_ABSOLUTE ${input_file})
set(input_file "${CMAKE_CURRENT_SOURCE_DIR}/${input_file}")
endif()
set(output_file "${CMAKE_CURRENT_BINARY_DIR}/arch_gen/${input_file}")
add_custom_command(
OUTPUT "${output_file}"
COMMAND ${CMAKE_COMMAND} "-Darch=${arch}"
"-Dinput_file=${input_file}"
"-Doutput_file=${output_file}"
-P "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/impl/TargetArchitectureSpecificSourcesWrapFile.cmake"
DEPENDS "${input_file}"
VERBATIM
)
target_sources(${project} PRIVATE "${output_file}")
endif()
endforeach()
endfunction()

View file

@ -0,0 +1,33 @@
@PACKAGE_INIT@
include(CMakeFindDependencyMacro)
set(ARCHITECTURE "@ARCHITECTURE@")
if (NOT @BUILD_SHARED_LIBS@)
find_dependency(Boost 1.57)
find_dependency(fmt 9)
find_dependency(mcl 0.1.12 EXACT)
find_dependency(unordered_dense)
if ("arm64" IN_LIST ARCHITECTURE)
find_dependency(oaknut 2.0.1)
endif()
if ("riscv" IN_LIST ARCHITECTURE)
find_dependency(biscuit 0.9.1)
endif()
if ("x86_64" IN_LIST ARCHITECTURE)
find_dependency(xbyak 7)
find_dependency(Zydis 4)
endif()
if (@DYNARMIC_USE_LLVM@)
find_dependency(LLVM)
endif()
endif()
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")
check_required_components(@PROJECT_NAME@)

View file

@ -0,0 +1,3 @@
string(TOUPPER "${arch}" arch)
file(READ "${input_file}" f_contents)
file(WRITE "${output_file}" "#include <mcl/macro/architecture.hpp>\n#if defined(MCL_ARCHITECTURE_${arch})\n${f_contents}\n#endif\n")

12
src/dynarmic/LICENSE.txt Normal file
View file

@ -0,0 +1,12 @@
Copyright (C) 2017 merryhime <git@mary.rs>
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

420
src/dynarmic/README.md Normal file
View file

@ -0,0 +1,420 @@
Dynarmic
========
[![Github Actions Build Status (x86-64)](https://github.com/yuzu-mirror/dynarmic/actions/workflows/x86-64.yml/badge.svg)](https://github.com/yuzu-mirror/dynarmic/actions/workflows/x86-64.yml) [![Github Actions Build Status (AArch64)](https://github.com/yuzu-mirror/dynarmic/actions/workflows/aarch64.yml/badge.svg)](https://github.com/yuzu-mirror/dynarmic/actions/workflows/AArch64.yml)
A dynamic recompiler for ARM.
Highlight features:
- Fast dynamic binary translation via Just-in-Time compilation
- Clean API
- Implemented in modern C++20
- Hooks exposed for easy code instrumentation
- Code injection support for very fine-grained instrumentation
- Support for unusual address space setups (bring-your-own memory system)
- Native support for most popular operating systems (Windows, macOS, Linux, FreeBSD, OpenBSD, NetBSD, Android)
*Please note that an adversarial guest program [can determine if it is being run under dynarmic](#disadvantages-of-dynarmic). Preventing this is not a goal of this project.*
### Supported guest architectures
* v3
* v4
* v4T
* v5TE
* v6K
* v6T2
* v7A
* 32-bit v8
* 64-bit v8
You can specify the specific guest version using [ArchVersion](src/dynarmic/interface/A32/arch_version.h).
There are no plans to support v1 or v2.
### Supported host architectures
* x86-64
* AArch64
There are no plans to support any 32-bit architecture.
Important API Changes in v6.x Series
------------------------------------
* **v6.7.0**
* To support use cases where one wants to have the guest to have the same address space as the host, `nullptr` is now a valid value for `fastmem_pointer`.
**This change is not backwards-compatible.** If you were previously using `nullptr` to represent an invalid fastmem arena, you will now have to use `std::nullopt`.
Documentation
-------------
Design documentation can be found at [docs/Design.md](docs/Design.md).
Usage Example
-------------
The below is a minimal example. Bring-your-own memory system.
```cpp
#include <array>
#include <cstdint>
#include <cstdio>
#include <exception>
#include "dynarmic/interface/A32/a32.h"
#include "dynarmic/interface/A32/config.h"
using u8 = std::uint8_t;
using u16 = std::uint16_t;
using u32 = std::uint32_t;
using u64 = std::uint64_t;
class MyEnvironment final : public Dynarmic::A32::UserCallbacks {
public:
u64 ticks_left = 0;
std::array<u8, 2048> memory{};
u8 MemoryRead8(u32 vaddr) override {
if (vaddr >= memory.size()) {
return 0;
}
return memory[vaddr];
}
u16 MemoryRead16(u32 vaddr) override {
return u16(MemoryRead8(vaddr)) | u16(MemoryRead8(vaddr + 1)) << 8;
}
u32 MemoryRead32(u32 vaddr) override {
return u32(MemoryRead16(vaddr)) | u32(MemoryRead16(vaddr + 2)) << 16;
}
u64 MemoryRead64(u32 vaddr) override {
return u64(MemoryRead32(vaddr)) | u64(MemoryRead32(vaddr + 4)) << 32;
}
void MemoryWrite8(u32 vaddr, u8 value) override {
if (vaddr >= memory.size()) {
return;
}
memory[vaddr] = value;
}
void MemoryWrite16(u32 vaddr, u16 value) override {
MemoryWrite8(vaddr, u8(value));
MemoryWrite8(vaddr + 1, u8(value >> 8));
}
void MemoryWrite32(u32 vaddr, u32 value) override {
MemoryWrite16(vaddr, u16(value));
MemoryWrite16(vaddr + 2, u16(value >> 16));
}
void MemoryWrite64(u32 vaddr, u64 value) override {
MemoryWrite32(vaddr, u32(value));
MemoryWrite32(vaddr + 4, u32(value >> 32));
}
void InterpreterFallback(u32 pc, size_t num_instructions) override {
// This is never called in practice.
std::terminate();
}
void CallSVC(u32 swi) override {
// Do something.
}
void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
// Do something.
}
void AddTicks(u64 ticks) override {
if (ticks > ticks_left) {
ticks_left = 0;
return;
}
ticks_left -= ticks;
}
u64 GetTicksRemaining() override {
return ticks_left;
}
};
int main(int argc, char** argv) {
MyEnvironment env;
Dynarmic::A32::UserConfig user_config;
user_config.callbacks = &env;
Dynarmic::A32::Jit cpu{user_config};
// Execute at least 1 instruction.
// (Note: More than one instruction may be executed.)
env.ticks_left = 1;
// Write some code to memory.
env.MemoryWrite16(0, 0x0088); // lsls r0, r1, #2
env.MemoryWrite16(2, 0xE7FE); // b +#0 (infinite loop)
// Setup registers.
cpu.Regs()[0] = 1;
cpu.Regs()[1] = 2;
cpu.Regs()[15] = 0; // PC = 0
cpu.SetCpsr(0x00000030); // Thumb mode
// Execute!
cpu.Run();
// Here we would expect cpu.Regs()[0] == 8
printf("R0: %u\n", cpu.Regs()[0]);
return 0;
}
```
Alternatives to Dynarmic
------------------------
Here are some projects with the same goals as dynarmic:
* [Unicorn](https://www.unicorn-engine.org/) - Recompiling multi-architecture CPU emulator, based on QEMU
* [SkyEye](http://skyeye.sourceforge.net) - Cached interpreter for ARM
More general alternatives:
* [tARMac](https://davidsharp.com/tarmac/) - Tarmac's use of armlets was initial inspiration for us to use an intermediate representation
* [QEMU](https://www.qemu.org/) - Recompiling multi-architecture system emulator
* [VisUAL](https://salmanarif.bitbucket.io/visual/index.html) - Visual ARM UAL emulator intended for education
* A wide variety of other recompilers, interpreters and emulators can be found embedded in other projects, here are some we would recommend looking at:
* [firebird's recompiler](https://github.com/nspire-emus/firebird) - Takes more of a call-threaded approach to recompilation
* [higan's arm7tdmi emulator](https://github.com/higan-emu/higan/tree/master/higan/component/processor/arm7tdmi) - Very clean code-style
* [arm-js by ozaki-r](https://github.com/ozaki-r/arm-js) - Emulates ARMv7A and some peripherals of Versatile Express, in the browser
Disadvantages of Dynarmic
-------------------------
In the pursuit of speed, some behavior not commonly depended upon is elided. Therefore this emulator does not match spec.
Please note that this would mean that a guest application can easily determine if it is being run under instrumentation.
Known examples:
* Only user-mode is emulated, there is no emulation of any other privilege levels.
* FPSR state is approximate.
* Misaligned loads/stores are not appropriately trapped in certain cases.
* Exclusive monitor behavior may not match any known physical processor.
No formal verification has been done, and no security assessment has been made.
Use this code base at your own risk.
Legal
-----
dynarmic is under a 0BSD license. See LICENSE.txt for more details.
dynarmic uses several other libraries, whose licenses are included below:
### biscuit
```
Copyright 2021 Lioncash/Lioncache
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
```
### catch
```
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
```
### fmt
```
Copyright (c) 2012 - 2016, Victor Zverovich
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
```
### mcl & oaknut
```
MIT License
Copyright (c) 2022 merryhime <https://mary.rs>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
```
### unordered_dense
```
MIT License
Copyright (c) 2022 Martin Leitner-Ankerl
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
```
### xbyak
```
Copyright (c) 2007 MITSUNARI Shigeo
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
Neither the name of the copyright owner nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
ソースコード形式かバイナリ形式か、変更するかしないかを問わず、以下の条件を満た
す場合に限り、再頒布および使用が許可されます。
ソースコードを再頒布する場合、上記の著作権表示、本条件一覧、および下記免責条項
を含めること。
バイナリ形式で再頒布する場合、頒布物に付属のドキュメント等の資料に、上記の著作
権表示、本条件一覧、および下記免責条項を含めること。
書面による特別の許可なしに、本ソフトウェアから派生した製品の宣伝または販売促進
に、著作権者の名前またはコントリビューターの名前を使用してはならない。
本ソフトウェアは、著作権者およびコントリビューターによって「現状のまま」提供さ
れており、明示黙示を問わず、商業的な使用可能性、および特定の目的に対する適合性
に関する暗黙の保証も含め、またそれに限定されない、いかなる保証もありません。
著作権者もコントリビューターも、事由のいかんを問わず、 損害発生の原因いかんを
問わず、かつ責任の根拠が契約であるか厳格責任であるか(過失その他の)不法行為で
あるかを問わず、仮にそのような損害が発生する可能性を知らされていたとしても、
本ソフトウェアの使用によって発生した(代替品または代用サービスの調達、使用の
喪失、データの喪失、利益の喪失、業務の中断も含め、またそれに限定されない)直接
損害、間接損害、偶発的な損害、特別損害、懲罰的損害、または結果損害について、
一切責任を負わないものとします。
```
### zydis
```
The MIT License (MIT)
Copyright (c) 2014-2020 Florian Bernd
Copyright (c) 2014-2020 Joel Höner
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
```

324
src/dynarmic/docs/Design.md Normal file
View file

@ -0,0 +1,324 @@
# Dynarmic Design Documentation
Dynarmic is a dynamic recompiler for the ARMv6K architecture. Future plans for dynarmic include
support for other versions of the ARM architecture, having a interpreter mode, and adding support
for other architectures.
Users of this library interact with it primarily through the interface provided in
[`src/dynarmic/interface`](../src/dynarmic/interface). Users specify how dynarmic's CPU core interacts with
the rest of their system providing an implementation of the relevant `UserCallbacks` interface.
Users setup the CPU state using member functions of `Jit`, then call `Jit::Execute` to start CPU
execution. The callbacks defined on `UserCallbacks` may be called from dynamically generated code,
so users of the library should not depend on the stack being in a walkable state for unwinding.
* A32: [`Jit`](../src/dynarmic/interface/A32/a32.h), [`UserCallbacks`](../src/dynarmic/interface/A32/config.h)
* A64: [`Jit`](../src/dynarmic/interface/A64/a64.h), [`UserCallbacks`](../src/dynarmic/interface/A64/config.h)
Dynarmic reads instructions from memory by calling `UserCallbacks::MemoryReadCode`. These
instructions then pass through several stages:
1. Decoding (Identifying what type of instruction it is and breaking it up into fields)
2. Translation (Generation of high-level IR from the instruction)
3. Optimization (Eliminiation of redundant microinstructions, other speed improvements)
4. Emission (Generation of host-executable code into memory)
5. Execution (Host CPU jumps to the start of emitted code and runs it)
Using the A32 frontend with the x64 backend as an example:
* Decoding is done by [double dispatch](https://en.wikipedia.org/wiki/Visitor_pattern) in
[`src/frontend/A32/decoder/{arm.h,thumb16.h,thumb32.h}`](../src/dynarmic/frontend/A32/decoder/).
* Translation is done by the visitors in [`src/dynarmic/frontend/A32/translate/translate_{arm,thumb}.cpp`](../src/dynarmic/frontend/A32/translate/).
The function [`Translate`](../src/dynarmic/frontend/A32/translate/translate.h) takes a starting memory location,
some CPU state, and memory reader callback and returns a basic block of IR.
* The IR can be found under [`src/frontend/ir/`](../src/dynarmic/ir/).
* Optimizations can be found under [`src/ir_opt/`](../src/dynarmic/ir/opt/).
* Emission is done by `EmitX64` which can be found in [`src/dynarmic/backend/x64/emit_x64.{h,cpp}`](../src/dynarmic/backend/x64/).
* Execution is performed by calling `BlockOfCode::RunCode` in [`src/dynarmic/backend/x64/block_of_code.{h,cpp}`](../src/dynarmic/backend/x64/).
## Decoder
The decoder is a double dispatch decoder. Each instruction is represented by a line in the relevant
instruction table. Here is an example line from [`arm.h`](../src/dynarmic/frontend/A32/decoder/arm.h):
INST(&V::arm_ADC_imm, "ADC (imm)", "cccc0010101Snnnnddddrrrrvvvvvvvv")
(Details on this instruction can be found in section A8.8.1 of the ARMv7-A manual. This is encoding A1.)
The first argument to INST is the member function to call on the visitor. The second argument is a user-readable
instruction name. The third argument is a bit-representation of the instruction.
### Instruction Bit-Representation
Each character in the bitstring represents a bit. A `0` means that that bitposition **must** contain a zero. A `1`
means that that bitposition **must** contain a one. A `-` means we don't care about the value at that bitposition.
A string of the same character represents a field. In the above example, the first four bits `cccc` represent the
four-bit-long cond field of the ARM Add with Carry (immediate) instruction.
The visitor would have to have a function named `arm_ADC_imm` with 6 arguments, one for each field (`cccc`, `S`,
`nnnn`, `dddd`, `rrrr`, `vvvvvvvv`). If there is a mismatch of field number with argument number, a compile-time
error results.
## Translator
The translator is a visitor that uses the decoder to decode instructions. The translator generates IR code with the
help of the [`IREmitter` class](../src/dynarmic/ir/ir_emitter.h). An example of a translation function follows:
bool ArmTranslatorVisitor::arm_ADC_imm(Cond cond, bool S, Reg n, Reg d, int rotate, Imm8 imm8) {
u32 imm32 = ArmExpandImm(rotate, imm8);
// ADC{S}<c> <Rd>, <Rn>, #<imm>
if (ConditionPassed(cond)) {
auto result = ir.AddWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.GetCFlag());
if (d == Reg::PC) {
ASSERT(!S);
ir.ALUWritePC(result.result);
ir.SetTerm(IR::Term::ReturnToDispatch{});
return false;
}
ir.SetRegister(d, result.result);
if (S) {
ir.SetNFlag(ir.MostSignificantBit(result.result));
ir.SetZFlag(ir.IsZero(result.result));
ir.SetCFlag(result.carry);
ir.SetVFlag(result.overflow);
}
}
return true;
}
where `ir` is an instance of the `IRBuilder` class. Each member function of the `IRBuilder` class constructs
an IR microinstruction.
## Intermediate Representation
Dynarmic uses an ordered SSA intermediate representation. It is very vaguely similar to those found in other
similar projects like redream, nucleus, and xenia. Major differences are: (1) the abundance of context
microinstructions whereas those projects generally only have two (`load_context`/`store_context`), (2) the
explicit handling of flags as their own values, and (3) very different basic block edge handling.
The intention of the context microinstructions and explicit flag handling is to allow for future optimizations. The
differences in the way edges are handled are a quirk of the current implementation and dynarmic will likely add a
function analyser in the medium-term future.
Dynarmic's intermediate representation is typed. Each microinstruction may take zero or more arguments and may
return zero or more arguments. A subset of the microinstructions available is documented below.
A complete list of microinstructions can be found in [src/dynarmic/ir/opcodes.inc](../src/dynarmic/ir/opcodes.inc).
The below lists some commonly used microinstructions.
### Immediate: Imm{U1,U8,U32,RegRef}
<u1> ImmU1(u1 value)
<u8> ImmU8(u8 value)
<u32> ImmU32(u32 value)
<RegRef> ImmRegRef(Arm::Reg gpr)
These instructions take a `bool`, `u8` or `u32` value and wraps it up in an IR node so that they can be used
by the IR.
### Context: {Get,Set}Register
<u32> GetRegister(<RegRef> reg)
<void> SetRegister(<RegRef> reg, <u32> value)
Gets and sets `JitState::Reg[reg]`. Note that `SetRegister(Arm::Reg::R15, _)` is disallowed by IRBuilder.
Use `{ALU,BX}WritePC` instead.
Note that sequences like `SetRegister(R4, _)` followed by `GetRegister(R4)` are
optimized away.
### Context: {Get,Set}{N,Z,C,V}Flag
<u1> GetNFlag()
<void> SetNFlag(<u1> value)
<u1> GetZFlag()
<void> SetZFlag(<u1> value)
<u1> GetCFlag()
<void> SetCFlag(<u1> value)
<u1> GetVFlag()
<void> SetVFlag(<u1> value)
Gets and sets bits in `JitState::Cpsr`. Similarly to registers redundant get/sets are optimized away.
### Context: BXWritePC
<void> BXWritePC(<u32> value)
This should probably be the last instruction in a translation block unless you're doing something fancy.
This microinstruction sets R15 and CPSR.T as appropriate.
### Callback: CallSupervisor
<void> CallSupervisor(<u32> svc_imm32)
This should probably be the last instruction in a translation block unless you're doing something fancy.
### Calculation: LastSignificant{Half,Byte}
<u16> LeastSignificantHalf(<u32> value)
<u8> LeastSignificantByte(<u32> value)
Extract a u16 and u8 respectively from a u32.
### Calculation: MostSignificantBit, IsZero
<u1> MostSignificantBit(<u32> value)
<u1> IsZero(<u32> value)
These are used to implement ARM flags N and Z. These can often be optimized away by the backend into a host flag read.
### Calculation: LogicalShiftLeft
(<u32> result, <u1> carry_out) LogicalShiftLeft(<u32> operand, <u8> shift_amount, <u1> carry_in)
Pseudocode:
if shift_amount == 0:
return (operand, carry_in)
x = operand * (2 ** shift_amount)
result = Bits<31,0>(x)
carry_out = Bit<32>(x)
return (result, carry_out)
This follows ARM semantics. Note `shift_amount` is not masked to 5 bits (like `SHL` does on x64).
### Calculation: LogicalShiftRight
(<u32> result, <u1> carry_out) LogicalShiftLeft(<u32> operand, <u8> shift_amount, <u1> carry_in)
Pseudocode:
if shift_amount == 0:
return (operand, carry_in)
x = ZeroExtend(operand, from_size: 32, to_size: shift_amount+32)
result = Bits<shift_amount+31,shift_amount>(x)
carry_out = Bit<shift_amount-1>(x)
return (result, carry_out)
This follows ARM semantics. Note `shift_amount` is not masked to 5 bits (like `SHR` does on x64).
### Calculation: ArithmeticShiftRight
(<u32> result, <u1> carry_out) ArithmeticShiftRight(<u32> operand, <u8> shift_amount, <u1> carry_in)
Pseudocode:
if shift_amount == 0:
return (operand, carry_in)
x = SignExtend(operand, from_size: 32, to_size: shift_amount+32)
result = Bits<shift_amount+31,shift_amount>(x)
carry_out = Bit<shift_amount-1>(x)
return (result, carry_out)
This follows ARM semantics. Note `shift_amount` is not masked to 5 bits (like `SAR` does on x64).
### Calcuation: RotateRight
(<u32> result, <u1> carry_out) RotateRight(<u32> operand, <u8> shift_amount, <u1> carry_in)
Pseudocode:
if shift_amount == 0:
return (operand, carry_in)
shift_amount %= 32
result = (operand << shift_amount) | (operand >> (32 - shift_amount))
carry_out = Bit<31>(result)
return (result, carry_out)
### Calculation: AddWithCarry
(<u32> result, <u1> carry_out, <u1> overflow) AddWithCarry(<u32> a, <u32> b, <u1> carry_in)
a + b + carry_in
### Calculation: SubWithCarry
(<u32> result, <u1> carry_out, <u1> overflow) SubWithCarry(<u32> a, <u32> b, <u1> carry_in)
This has equivalent semantics to `AddWithCarry(a, Not(b), carry_in)`.
a - b - !carry_in
### Calculation: And
<u32> And(<u32> a, <u32> b)
### Calculation: Eor
<u32> Eor(<u32> a, <u32> b)
Exclusive OR (i.e.: XOR)
### Calculation: Or
<u32> Or(<u32> a, <u32> b)
### Calculation: Not
<u32> Not(<u32> value)
### Callback: {Read,Write}Memory{8,16,32,64}
<u8> ReadMemory8(<u32> vaddr)
<u8> ReadMemory16(<u32> vaddr)
<u8> ReadMemory32(<u32> vaddr)
<u8> ReadMemory64(<u32> vaddr)
<void> WriteMemory8(<u32> vaddr, <u8> value_to_store)
<void> WriteMemory16(<u32> vaddr, <u16> value_to_store)
<void> WriteMemory32(<u32> vaddr, <u32> value_to_store)
<void> WriteMemory64(<u32> vaddr, <u64> value_to_store)
Memory access.
### Terminal: Interpret
SetTerm(IR::Term::Interpret{next})
This terminal instruction calls the interpreter, starting at `next`.
The interpreter must interpret exactly one instruction.
### Terminal: ReturnToDispatch
SetTerm(IR::Term::ReturnToDispatch{})
This terminal instruction returns control to the dispatcher.
The dispatcher will use the value in R15 to determine what comes next.
### Terminal: LinkBlock
SetTerm(IR::Term::LinkBlock{next})
This terminal instruction jumps to the basic block described by `next` if we have enough
cycles remaining. If we do not have enough cycles remaining, we return to the
dispatcher, which will return control to the host.
### Terminal: PopRSBHint
SetTerm(IR::Term::PopRSBHint{})
This terminal instruction checks the top of the Return Stack Buffer against R15.
If RSB lookup fails, control is returned to the dispatcher.
This is an optimization for faster function calls. A backend that doesn't support
this optimization or doesn't have a RSB may choose to implement this exactly as
ReturnToDispatch.
### Terminal: If
SetTerm(IR::Term::If{cond, term_then, term_else})
This terminal instruction conditionally executes one terminal or another depending
on the run-time state of the ARM flags.

2474
src/dynarmic/docs/Doxyfile Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,76 @@
# Register Allocation (x64 Backend)
`HostLoc`s contain values. A `HostLoc` ("host value location") is either a host CPU register or a host spill location.
Values once set cannot be changed. Values can however be moved by the register allocator between `HostLoc`s. This is
handled by the register allocator itself and code that uses the register allocator need not and should not move values
between registers.
The register allocator is based on three concepts: `Use`, `Def` and `Scratch`.
* `Use`: The use of a value.
* `Define`: The definition of a value, this is the only time when a value is set.
* `Scratch`: Allocate a register that can be freely modified as one wishes.
Note that `Use`ing a value decrements its `use_count` by one. When the `use_count` reaches zero the value is discarded and no longer exists.
The member functions on `RegAlloc` are just a combination of the above concepts.
### `Scratch`
Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr)
Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm)
At runtime, allocate one of the registers in `desired_locations`. You are free to modify the register. The register is discarded at the end of the allocation scope.
### Pure `Use`
Xbyak::Reg64 UseGpr(Argument& arg);
Xbyak::Xmm UseXmm(Argument& arg);
OpArg UseOpArg(Argument& arg);
void Use(Argument& arg, HostLoc host_loc);
At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by
which one of the above functions is called. `UseGpr` places it in an unused GPR, `UseXmm` places it
in an unused XMM register, `UseOpArg` might be in a register or might be a memory location, and `Use` allows
you to specify a specific register (GPR or XMM) to use.
This register **must not** have it's value changed.
### `UseScratch`
Xbyak::Reg64 UseScratchGpr(Argument& arg);
Xbyak::Xmm UseScratchXmm(Argument& arg);
void UseScratch(Argument& arg, HostLoc host_loc);
At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by
which one of the above functions is called. `UseScratchGpr` places it in an unused GPR, `UseScratchXmm` places it
in an unused XMM register, and `UseScratch` allows you to specify a specific register (GPR or XMM) to use.
The return value is the register allocated to you.
You are free to modify the value in the register. The register is discarded at the end of the allocation scope.
### `Define` as register
A `Define` is the defintion of a value. This is the only time when a value may be set.
void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg);
By calling `DefineValue`, you are stating that you wish to define the value for `inst`, and you have written the
value to the specified register `reg`.
### `Define`ing as an alias of a different value
Adding a `Define` to an existing value.
void DefineValue(IR::Inst* inst, Argument& arg);
You are declaring that the value for `inst` is the same as the value for `arg`. No host machine instructions are
emitted.
## When to use each?
* Prefer `Use` to `UseScratch` where possible.
* Prefer the `OpArg` variants where possible.
* Prefer to **not** use the specific `HostLoc` variants where possible.

View file

@ -0,0 +1,145 @@
# Return Stack Buffer Optimization (x64 Backend)
One of the optimizations that dynarmic does is block-linking. Block-linking is done when
the destination address of a jump is available at JIT-time. Instead of returning to the
dispatcher at the end of a block we can perform block-linking: just jump directly to the
next block. This is beneficial because returning to the dispatcher can often be quite
expensive.
What should we do in cases when we can't predict the destination address? The eponymous
example is when executing a return statement at the end of a function; the return address
is not statically known at compile time.
We deal with this by using a return stack buffer: When we execute a call instruction,
we push our prediction onto the RSB. When we execute a return instruction, we pop a
prediction off the RSB. If the prediction is a hit, we immediately jump to the relevant
compiled block. Otherwise, we return to the dispatcher.
This is the essential idea behind this optimization.
## `UniqueHash`
One complication dynarmic has is that a compiled block is not uniquely identifiable by
the PC alone, but bits in the FPSCR and CPSR are also relevant. We resolve this by
computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block.
u64 LocationDescriptor::UniqueHash() const {
// This value MUST BE UNIQUE.
// This calculation has to match up with EmitX64::EmitTerminalPopRSBHint
u64 pc_u64 = u64(arm_pc) << 32;
u64 fpscr_u64 = u64(fpscr.Value());
u64 t_u64 = cpsr.T() ? 1 : 0;
u64 e_u64 = cpsr.E() ? 2 : 0;
return pc_u64 | fpscr_u64 | t_u64 | e_u64;
}
## Our implementation isn't actually a stack
Dynarmic's RSB isn't actually a stack. It was implemented as a ring buffer because
that showed better performance in tests.
### RSB Structure
The RSB is implemented as a ring buffer. `rsb_ptr` is the index of the insertion
point. Each element in `rsb_location_descriptors` is a `UniqueHash` and they
each correspond to an element in `rsb_codeptrs`. `rsb_codeptrs` contains the
host addresses for the corresponding the compiled blocks.
`RSBSize` was chosen by performance testing. Note that this is bigger than the
size of the real RSB in hardware (which has 3 entries). Larger RSBs than 8
showed degraded performance.
struct JitState {
// ...
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
u32 rsb_ptr = 0;
std::array<u64, RSBSize> rsb_location_descriptors;
std::array<u64, RSBSize> rsb_codeptrs;
void ResetRSB();
// ...
};
### RSB Push
We insert our prediction at the insertion point iff the RSB doesn't already
contain a prediction with the same `UniqueHash`.
void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) {
using namespace Xbyak::util;
ASSERT(inst->GetArg(0).IsImmediate());
u64 imm64 = inst->GetArg(0).GetU64();
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX});
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr();
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32();
u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end()
? u64(unique_hash_to_code_ptr[imm64])
: u64(code->GetReturnFromRunCodeAddress());
code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]);
code->add(index_reg, 1);
code->and_(index_reg, u32(JitState::RSBSize - 1));
code->mov(loc_desc_reg, u64(imm64));
CodePtr patch_location = code->getCurr<CodePtr>();
patch_unique_hash_locations[imm64].emplace_back(patch_location);
code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch.
code->EnsurePatchLocationSize(patch_location, 10);
Xbyak::Label label;
for (size_t i = 0; i < JitState::RSBSize; ++i) {
code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
code->je(label, code->T_SHORT);
}
code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg);
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
code->L(label);
}
In pseudocode:
for (i := 0 .. RSBSize-1)
if (rsb_location_descriptors[i] == imm64)
goto label;
rsb_ptr++;
rsb_ptr %= RSBSize;
rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash
rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */;
label:
## RSB Pop
To check if a predicition is in the RSB, we linearly scan the RSB.
void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) {
using namespace Xbyak::util;
// This calculation has to match up with IREmitter::PushRSB
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
code->shl(rcx, 32);
code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]);
code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]);
code->or_(rbx, rcx);
code->mov(rax, u64(code->GetReturnFromRunCodeAddress()));
for (size_t i = 0; i < JitState::RSBSize; ++i) {
code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
}
code->jmp(rax);
}
In pseudocode:
rbx := ComputeUniqueHash()
rax := ReturnToDispatch
for (i := 0 .. RSBSize-1)
if (rbx == rsb_location_descriptors[i])
rax = rsb_codeptrs[i]
goto rax

116
src/dynarmic/externals/CMakeLists.txt vendored Normal file
View file

@ -0,0 +1,116 @@
include(CPM)
# Always build externals as static libraries, even when dynarmic is built as shared
if (BUILD_SHARED_LIBS)
set(BUILD_SHARED_LIBS OFF)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL ON)
endif()
# Allow options shadowing with normal variables when subproject use old cmake policy
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
# Disable tests in all externals supporting the standard option name
set(BUILD_TESTING OFF)
# For libraries that already come with a CMakeLists file,
# simply add the directory to that file as a subdirectory
# to have CMake automatically recognize them.
# biscuit
if ("riscv" IN_LIST ARCHITECTURE)
add_subdirectory(biscuit)
CPMAddPackage(
NAME biscuit
VERSION 0.9.1
URL "https://github.com/lioncash/biscuit/archive/76b0be8dae.zip"
URL_HASH SHA512=47d55ed02d032d6cf3dc107c6c0a9aea686d5f25aefb81d1af91db027b6815bd5add1755505e19d76625feeb17aa2db6cd1668fe0dad2e6a411519bde6ca4489
CUSTOM_CACHE_KEY "76b0"
)
endif()
# catch
# TODO(crueter): dedup
if (NOT TARGET Catch2::Catch2WithMain)
if (DYNARMIC_TESTS)
find_package(Catch2 3.0.1 REQUIRED)
endif()
endif()
# fmt
if (NOT TARGET fmt::fmt)
# fmtlib formatting library
set(FMT_INSTALL ON)
add_subdirectory(fmt)
endif()
# mcl
CPMAddPackage(
NAME mcl
VERSION 0.1.12
URL "https://github.com/azahar-emu/mcl/archive/7b08d83418.zip"
URL_HASH SHA512=f943bac39c1879986decad7a442ff4288eaeca4a2907684c7914e115a55ecc43c2782ded85c0835763fe04e40d5c82220ce864423e489e648e408a84f54dc4f3
OPTIONS
"MCL_INSTALL ON"
CUSTOM_CACHE_KEY "7b08"
)
# oaknut
# if (NOT TARGET merry::oaknut)
# if ("arm64" IN_LIST ARCHITECTURE)
# add_subdirectory(oaknut)
# elseif (DYNARMIC_TESTS)
# add_subdirectory(oaknut EXCLUDE_FROM_ALL)
# endif()
# endif()
# unordered_dense
CPMAddPackage(
NAME unordered_dense
URL "https://github.com/Lizzie841/unordered_dense/archive/e59d30b7b1.zip"
URL_HASH SHA512=71eff7bd9ba4b9226967bacd56a8ff000946f8813167cb5664bb01e96fb79e4e220684d824fe9c59c4d1cc98c606f13aff05b7940a1ed8ab3c95d6974ee34fa0
FIND_PACKAGE_ARGUMENTS "CONFIG"
OPTIONS
"UNORDERED_DENSE_INSTALL ON"
CUSTOM_CACHE_KEY "e59d"
)
# xbyak
# uncomment if in an independent repo
# if (NOT TARGET xbyak::xbyak)
# if ("x86_64" IN_LIST ARCHITECTURE)
# add_subdirectory(xbyak)
# endif()
# endif()
# zydis
if ("x86_64" IN_LIST ARCHITECTURE)
CPMAddPackage(
NAME Zycore
URL "https://github.com/zyantific/zycore-c/archive/75a36c45ae.zip"
URL_HASH SHA512=15aa399f39713e042c4345bc3175c82f14dca849fde2a21d4f591f62c43e227b70d868d8bb86beb5f4eb68b1d6bd3792cdd638acf89009e787e3d10ee7401924
CUSTOM_CACHE_KEY "75a3"
)
CPMAddPackage(
NAME Zydis
VERSION 4
URL "https://github.com/zyantific/zydis/archive/c2d2bab025.zip"
URL_HASH SHA512=7b48f213ff7aab2926f8c9c65195959143bebbfb2b9a25051ffd8b8b0f1baf1670d9739781de674577d955925f91ac89376e16b476a03828c84e2fd765d45020
OPTIONS
"ZYDIS_BUILD_TOOLS OFF"
"ZYDIS_BUILD_EXAMPLES OFF"
"ZYDIS_BUILD_DOXYGEN OFF"
"ZYAN_ZYCORE_PATH ${Zycore_SOURCE_DIR}"
"CMAKE_DISABLE_FIND_PACKAGE_Doxygen ON"
CUSTOM_CACHE_KEY "c2d2"
)
endif()

View file

@ -0,0 +1,447 @@
include(TargetArchitectureSpecificSources)
add_library(dynarmic
backend/block_range_information.cpp
backend/block_range_information.h
backend/exception_handler.h
common/always_false.h
common/assert.cpp
common/assert.h
common/cast_util.h
common/common_types.h
common/crypto/aes.cpp
common/crypto/aes.h
common/crypto/crc32.cpp
common/crypto/crc32.h
common/crypto/sm4.cpp
common/crypto/sm4.h
common/fp/fpcr.h
common/fp/fpsr.h
common/fp/fused.cpp
common/fp/fused.h
common/fp/info.h
common/fp/mantissa_util.h
common/fp/op.h
common/fp/op/FPCompare.cpp
common/fp/op/FPCompare.h
common/fp/op/FPConvert.cpp
common/fp/op/FPConvert.h
common/fp/op/FPMulAdd.cpp
common/fp/op/FPMulAdd.h
common/fp/op/FPNeg.h
common/fp/op/FPRecipEstimate.cpp
common/fp/op/FPRecipEstimate.h
common/fp/op/FPRecipExponent.cpp
common/fp/op/FPRecipExponent.h
common/fp/op/FPRecipStepFused.cpp
common/fp/op/FPRecipStepFused.h
common/fp/op/FPRoundInt.cpp
common/fp/op/FPRoundInt.h
common/fp/op/FPRSqrtEstimate.cpp
common/fp/op/FPRSqrtEstimate.h
common/fp/op/FPRSqrtStepFused.cpp
common/fp/op/FPRSqrtStepFused.h
common/fp/op/FPToFixed.cpp
common/fp/op/FPToFixed.h
common/fp/process_exception.cpp
common/fp/process_exception.h
common/fp/process_nan.cpp
common/fp/process_nan.h
common/fp/rounding_mode.h
common/fp/unpacked.cpp
common/fp/unpacked.h
common/fp/util.h
common/llvm_disassemble.cpp
common/llvm_disassemble.h
common/lut_from_list.h
common/math_util.cpp
common/math_util.h
common/memory_pool.cpp
common/memory_pool.h
common/safe_ops.h
common/spin_lock.h
common/string_util.h
common/u128.cpp
common/u128.h
common/variant_util.h
frontend/A32/a32_types.cpp
frontend/A32/a32_types.h
frontend/A64/a64_types.cpp
frontend/A64/a64_types.h
frontend/decoder/decoder_detail.h
frontend/decoder/matcher.h
frontend/imm.cpp
frontend/imm.h
interface/exclusive_monitor.h
interface/optimization_flags.h
ir/acc_type.h
ir/basic_block.cpp
ir/basic_block.h
ir/cond.h
ir/ir_emitter.cpp
ir/ir_emitter.h
ir/location_descriptor.cpp
ir/location_descriptor.h
ir/microinstruction.cpp
ir/microinstruction.h
ir/opcodes.cpp
ir/opcodes.h
ir/opcodes.inc
ir/opt/constant_propagation_pass.cpp
ir/opt/dead_code_elimination_pass.cpp
ir/opt/identity_removal_pass.cpp
ir/opt/ir_matcher.h
ir/opt/naming_pass.cpp
ir/opt/passes.h
ir/opt/polyfill_pass.cpp
ir/opt/verification_pass.cpp
ir/terminal.h
ir/type.cpp
ir/type.h
ir/value.cpp
ir/value.h
)
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic PRIVATE
frontend/A32/a32_ir_emitter.cpp
frontend/A32/a32_ir_emitter.h
frontend/A32/a32_location_descriptor.cpp
frontend/A32/a32_location_descriptor.h
frontend/A32/decoder/arm.h
frontend/A32/decoder/arm.inc
frontend/A32/decoder/asimd.h
frontend/A32/decoder/asimd.inc
frontend/A32/decoder/thumb16.h
frontend/A32/decoder/thumb16.inc
frontend/A32/decoder/thumb32.h
frontend/A32/decoder/thumb32.inc
frontend/A32/decoder/vfp.h
frontend/A32/decoder/vfp.inc
frontend/A32/disassembler/disassembler.h
frontend/A32/disassembler/disassembler_arm.cpp
frontend/A32/disassembler/disassembler_thumb.cpp
frontend/A32/FPSCR.h
frontend/A32/ITState.h
frontend/A32/PSR.h
frontend/A32/translate/a32_translate.cpp
frontend/A32/translate/a32_translate.h
frontend/A32/translate/conditional_state.cpp
frontend/A32/translate/conditional_state.h
frontend/A32/translate/translate_arm.cpp
frontend/A32/translate/translate_thumb.cpp
interface/A32/a32.h
interface/A32/arch_version.h
interface/A32/config.h
interface/A32/coprocessor.h
interface/A32/coprocessor_util.h
interface/A32/disassembler.h
ir/opt/a32_constant_memory_reads_pass.cpp
ir/opt/a32_get_set_elimination_pass.cpp
)
endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic PRIVATE
frontend/A64/a64_ir_emitter.cpp
frontend/A64/a64_ir_emitter.h
frontend/A64/a64_location_descriptor.cpp
frontend/A64/a64_location_descriptor.h
frontend/A64/decoder/a64.h
frontend/A64/decoder/a64.inc
frontend/A64/translate/a64_translate.cpp
frontend/A64/translate/a64_translate.h
interface/A64/a64.h
interface/A64/config.h
ir/opt/a64_callback_config_pass.cpp
ir/opt/a64_get_set_elimination_pass.cpp
ir/opt/a64_merge_interpret_blocks.cpp
)
endif()
if ("x86_64" IN_LIST ARCHITECTURE)
target_compile_definitions(dynarmic PRIVATE XBYAK_OLD_DISP_CHECK=1)
target_link_libraries(dynarmic
PRIVATE
xbyak::xbyak
Zydis
)
target_architecture_specific_sources(dynarmic "x86_64"
backend/x64/abi.cpp
backend/x64/abi.h
backend/x64/block_of_code.cpp
backend/x64/block_of_code.h
backend/x64/callback.cpp
backend/x64/callback.h
backend/x64/constant_pool.cpp
backend/x64/constant_pool.h
backend/x64/constants.h
backend/x64/devirtualize.h
backend/x64/emit_x64.cpp
backend/x64/emit_x64.h
backend/x64/emit_x64_aes.cpp
backend/x64/emit_x64_crc32.cpp
backend/x64/emit_x64_data_processing.cpp
backend/x64/emit_x64_floating_point.cpp
backend/x64/emit_x64_memory.cpp.inc
backend/x64/emit_x64_memory.h
backend/x64/emit_x64_packed.cpp
backend/x64/emit_x64_saturation.cpp
backend/x64/emit_x64_sha.cpp
backend/x64/emit_x64_sm4.cpp
backend/x64/emit_x64_vector.cpp
backend/x64/emit_x64_vector_floating_point.cpp
backend/x64/emit_x64_vector_saturation.cpp
backend/x64/exclusive_monitor.cpp
backend/x64/exclusive_monitor_friend.h
backend/x64/host_feature.h
backend/x64/hostloc.cpp
backend/x64/hostloc.h
backend/x64/jitstate_info.h
backend/x64/oparg.h
backend/x64/perf_map.cpp
backend/x64/perf_map.h
backend/x64/reg_alloc.cpp
backend/x64/reg_alloc.h
backend/x64/stack_layout.h
backend/x64/verbose_debugging_output.cpp
backend/x64/verbose_debugging_output.h
common/spin_lock_x64.cpp
common/spin_lock_x64.h
common/x64_disassemble.cpp
common/x64_disassemble.h
)
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_architecture_specific_sources(dynarmic "x86_64"
backend/x64/a32_emit_x64.cpp
backend/x64/a32_emit_x64.h
backend/x64/a32_emit_x64_memory.cpp
backend/x64/a32_interface.cpp
backend/x64/a32_jitstate.cpp
backend/x64/a32_jitstate.h
)
endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
target_architecture_specific_sources(dynarmic "x86_64"
backend/x64/a64_emit_x64.cpp
backend/x64/a64_emit_x64.h
backend/x64/a64_emit_x64_memory.cpp
backend/x64/a64_interface.cpp
backend/x64/a64_jitstate.cpp
backend/x64/a64_jitstate.h
)
endif()
endif()
if ("arm64" IN_LIST ARCHITECTURE)
target_link_libraries(dynarmic PRIVATE merry::oaknut)
target_architecture_specific_sources(dynarmic "arm64"
backend/arm64/a32_jitstate.cpp
backend/arm64/a32_jitstate.h
backend/arm64/a64_jitstate.h
backend/arm64/abi.cpp
backend/arm64/abi.h
backend/arm64/address_space.cpp
backend/arm64/address_space.h
backend/arm64/devirtualize.h
backend/arm64/emit_arm64.cpp
backend/arm64/emit_arm64.h
backend/arm64/emit_arm64_a32.cpp
backend/arm64/emit_arm64_a32_coprocessor.cpp
backend/arm64/emit_arm64_a32_memory.cpp
backend/arm64/emit_arm64_a64.cpp
backend/arm64/emit_arm64_a64_memory.cpp
backend/arm64/emit_arm64_cryptography.cpp
backend/arm64/emit_arm64_data_processing.cpp
backend/arm64/emit_arm64_floating_point.cpp
backend/arm64/emit_arm64_memory.cpp
backend/arm64/emit_arm64_memory.h
backend/arm64/emit_arm64_packed.cpp
backend/arm64/emit_arm64_saturation.cpp
backend/arm64/emit_arm64_vector.cpp
backend/arm64/emit_arm64_vector_floating_point.cpp
backend/arm64/emit_arm64_vector_saturation.cpp
backend/arm64/emit_context.h
backend/arm64/exclusive_monitor.cpp
backend/arm64/fastmem.h
backend/arm64/fpsr_manager.cpp
backend/arm64/fpsr_manager.h
backend/arm64/reg_alloc.cpp
backend/arm64/reg_alloc.h
backend/arm64/stack_layout.h
backend/arm64/verbose_debugging_output.cpp
backend/arm64/verbose_debugging_output.h
common/spin_lock_arm64.cpp
common/spin_lock_arm64.h
)
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_architecture_specific_sources(dynarmic "arm64"
backend/arm64/a32_address_space.cpp
backend/arm64/a32_address_space.h
backend/arm64/a32_core.h
backend/arm64/a32_interface.cpp
)
endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
target_architecture_specific_sources(dynarmic "arm64"
backend/arm64/a64_address_space.cpp
backend/arm64/a64_address_space.h
backend/arm64/a64_core.h
backend/arm64/a64_interface.cpp
)
endif()
endif()
if ("riscv" IN_LIST ARCHITECTURE)
target_link_libraries(dynarmic PRIVATE biscuit::biscuit)
target_sources(dynarmic PRIVATE
backend/riscv64/abi.h
backend/riscv64/a32_jitstate.cpp
backend/riscv64/a32_jitstate.h
backend/riscv64/emit_context.h
backend/riscv64/emit_riscv64_a32.cpp
backend/riscv64/emit_riscv64_a32_coprocessor.cpp
backend/riscv64/emit_riscv64_a32_memory.cpp
backend/riscv64/emit_riscv64_a64.cpp
backend/riscv64/emit_riscv64_a64_memory.cpp
backend/riscv64/emit_riscv64_cryptography.cpp
backend/riscv64/emit_riscv64_data_processing.cpp
backend/riscv64/emit_riscv64_floating_point.cpp
backend/riscv64/emit_riscv64_packed.cpp
backend/riscv64/emit_riscv64_saturation.cpp
backend/riscv64/emit_riscv64_vector_floating_point.cpp
backend/riscv64/emit_riscv64_vector_saturation.cpp
backend/riscv64/emit_riscv64_vector.cpp
backend/riscv64/emit_riscv64.cpp
backend/riscv64/emit_riscv64.h
backend/riscv64/reg_alloc.cpp
backend/riscv64/reg_alloc.h
backend/riscv64/stack_layout.h
)
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic PRIVATE
backend/riscv64/a32_address_space.cpp
backend/riscv64/a32_address_space.h
backend/riscv64/a32_core.h
backend/riscv64/a32_interface.cpp
backend/riscv64/code_block.h
)
endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture")
endif()
endif()
if (WIN32)
target_sources(dynarmic PRIVATE backend/exception_handler_windows.cpp)
elseif (APPLE)
find_path(MACH_EXC_DEFS_DIR "mach/mach_exc.defs")
if (NOT MACH_EXC_DEFS_DIR)
message(WARNING "macOS fastmem disabled: unable to find mach/mach_exc.defs")
target_sources(dynarmic PRIVATE backend/exception_handler_generic.cpp)
else()
message(STATUS "mach/mach_exc.defs location: ${MACH_EXC_DEFS_DIR}")
execute_process(
COMMAND
mkdir -p "${CMAKE_CURRENT_SOURCE_DIR}/backend/x64/mig"
COMMAND
mig
-arch x86_64
-user "${CMAKE_CURRENT_SOURCE_DIR}/backend/x64/mig/mach_exc_user.c"
-header "${CMAKE_CURRENT_SOURCE_DIR}/backend/x64/mig/mach_exc_user.h"
-server "${CMAKE_CURRENT_SOURCE_DIR}/backend/x64/mig/mach_exc_server.c"
-sheader "${CMAKE_CURRENT_SOURCE_DIR}/backend/x64/mig/mach_exc_server.h"
"${MACH_EXC_DEFS_DIR}/mach/mach_exc.defs"
)
execute_process(
COMMAND
mkdir -p "${CMAKE_CURRENT_SOURCE_DIR}/backend/arm64/mig"
COMMAND
mig
-arch arm64
-user "${CMAKE_CURRENT_SOURCE_DIR}/backend/arm64/mig/mach_exc_user.c"
-header "${CMAKE_CURRENT_SOURCE_DIR}/backend/arm64/mig/mach_exc_user.h"
-server "${CMAKE_CURRENT_SOURCE_DIR}/backend/arm64/mig/mach_exc_server.c"
-sheader "${CMAKE_CURRENT_SOURCE_DIR}/backend/arm64/mig/mach_exc_server.h"
"${MACH_EXC_DEFS_DIR}/mach/mach_exc.defs"
)
target_sources(dynarmic PRIVATE
backend/exception_handler_macos.cpp
backend/exception_handler_macos_mig.c
)
endif()
elseif (UNIX)
if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
target_link_libraries(dynarmic PRIVATE rt)
endif()
target_sources(dynarmic PRIVATE backend/exception_handler_posix.cpp)
else()
target_sources(dynarmic PRIVATE backend/exception_handler_generic.cpp)
endif()
include(CreateDirectoryGroups)
create_target_directory_groups(dynarmic)
target_include_directories(dynarmic PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
)
set_target_properties(dynarmic PROPERTIES
VERSION ${dynarmic_VERSION}
SOVERSION ${dynarmic_VERSION_MAJOR}.${dynarmic_VERSION_MINOR}
)
if (TARGET unordered_dense::unordered_dense)
# weird quirk of system installs
target_link_libraries(dynarmic
PRIVATE
unordered_dense::unordered_dense
)
endif()
target_compile_options(dynarmic PRIVATE ${DYNARMIC_CXX_FLAGS})
target_link_libraries(dynarmic
PRIVATE
Boost::boost
fmt::fmt
merry::mcl
)
if (DYNARMIC_USE_LLVM)
target_include_directories(dynarmic PRIVATE ${LLVM_INCLUDE_DIRS})
target_compile_definitions(dynarmic PRIVATE DYNARMIC_USE_LLVM=1 ${LLVM_DEFINITIONS})
llvm_config(dynarmic USE_SHARED armdesc armdisassembler aarch64desc aarch64disassembler x86desc x86disassembler)
endif()
if (DYNARMIC_ENABLE_CPU_FEATURE_DETECTION)
target_compile_definitions(dynarmic PRIVATE DYNARMIC_ENABLE_CPU_FEATURE_DETECTION=1)
endif()
if (DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT)
target_compile_definitions(dynarmic PRIVATE DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT=1)
endif()
if (DYNARMIC_IGNORE_ASSERTS)
target_compile_definitions(dynarmic PRIVATE MCL_IGNORE_ASSERTS=1)
endif()
if (CMAKE_SYSTEM_NAME STREQUAL "Windows")
target_compile_definitions(dynarmic PRIVATE FMT_USE_WINDOWS_H=0)
endif()
target_compile_definitions(dynarmic PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
if (DYNARMIC_USE_PRECOMPILED_HEADERS)
set(PRECOMPILED_HEADERS "$<$<COMPILE_LANGUAGE:CXX>:${CMAKE_CURRENT_SOURCE_DIR}/ir/ir_emitter.h>")
if ("x86_64" IN_LIST ARCHITECTURE)
list(PREPEND PRECOMPILED_HEADERS "$<$<COMPILE_LANGUAGE:CXX>:<xbyak/xbyak.h$<ANGLE-R>>")
endif()
if ("arm64" IN_LIST ARCHITECTURE)
list(PREPEND PRECOMPILED_HEADERS "$<$<COMPILE_LANGUAGE:CXX>:<oaknut/oaknut.hpp$<ANGLE-R>>")
endif()
target_precompile_headers(dynarmic PRIVATE ${PRECOMPILED_HEADERS})
set(CMAKE_PCH_INSTANTIATE_TEMPLATES ON)
endif()

View file

@ -0,0 +1,424 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/arm64/a32_address_space.h"
#include "dynarmic/backend/arm64/a32_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/devirtualize.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/stack_layout.h"
#include "dynarmic/common/cast_util.h"
#include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
#include "dynarmic/frontend/A32/translate/a32_translate.h"
#include "dynarmic/interface/A32/config.h"
#include "dynarmic/interface/exclusive_monitor.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Backend::Arm64 {
template<auto mfp, typename T>
static void* EmitCallTrampoline(oaknut::CodeGenerator& code, T* this_) {
using namespace oaknut::util;
const auto info = Devirtualize<mfp>(this_);
oaknut::Label l_addr, l_this;
void* target = code.xptr<void*>();
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(info.this_ptr);
code.l(l_addr);
code.dx(info.fn_ptr);
return target;
}
template<auto mfp, typename T>
static void* EmitWrappedReadCallTrampoline(oaknut::CodeGenerator& code, T* this_) {
using namespace oaknut::util;
const auto info = Devirtualize<mfp>(this_);
oaknut::Label l_addr, l_this;
constexpr u64 save_regs = ABI_CALLER_SAVE & ~ToRegList(Xscratch0);
void* target = code.xptr<void*>();
ABI_PushRegisters(code, save_regs, 0);
code.LDR(X0, l_this);
code.MOV(X1, Xscratch0);
code.LDR(Xscratch0, l_addr);
code.BLR(Xscratch0);
code.MOV(Xscratch0, X0);
ABI_PopRegisters(code, save_regs, 0);
code.RET();
code.align(8);
code.l(l_this);
code.dx(info.this_ptr);
code.l(l_addr);
code.dx(info.fn_ptr);
return target;
}
template<auto callback, typename T>
static void* EmitExclusiveReadCallTrampoline(oaknut::CodeGenerator& code, const A32::UserConfig& conf) {
using namespace oaknut::util;
oaknut::Label l_addr, l_this;
auto fn = [](const A32::UserConfig& conf, A32::VAddr vaddr) -> T {
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
return (conf.callbacks->*callback)(vaddr);
});
};
void* target = code.xptr<void*>();
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(&conf));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
return target;
}
template<auto mfp, typename T>
static void* EmitWrappedWriteCallTrampoline(oaknut::CodeGenerator& code, T* this_) {
using namespace oaknut::util;
const auto info = Devirtualize<mfp>(this_);
oaknut::Label l_addr, l_this;
constexpr u64 save_regs = ABI_CALLER_SAVE;
void* target = code.xptr<void*>();
ABI_PushRegisters(code, save_regs, 0);
code.LDR(X0, l_this);
code.MOV(X1, Xscratch0);
code.MOV(X2, Xscratch1);
code.LDR(Xscratch0, l_addr);
code.BLR(Xscratch0);
ABI_PopRegisters(code, save_regs, 0);
code.RET();
code.align(8);
code.l(l_this);
code.dx(info.this_ptr);
code.l(l_addr);
code.dx(info.fn_ptr);
return target;
}
template<auto callback, typename T>
static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const A32::UserConfig& conf) {
using namespace oaknut::util;
oaknut::Label l_addr, l_this;
auto fn = [](const A32::UserConfig& conf, A32::VAddr vaddr, T value) -> u32 {
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
[&](T expected) -> bool {
return (conf.callbacks->*callback)(vaddr, value, expected);
})
? 0
: 1;
};
void* target = code.xptr<void*>();
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(&conf));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
return target;
}
A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf)
: AddressSpace(conf.code_cache_size)
, conf(conf) {
EmitPrelude();
}
IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
Optimization::PolyfillPass(ir_block, {});
Optimization::NamingPass(ir_block);
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true});
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
Optimization::IdentityRemovalPass(ir_block);
Optimization::VerificationPass(ir_block);
return ir_block;
}
void A32AddressSpace::InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges) {
InvalidateBasicBlocks(block_ranges.InvalidateRanges(ranges));
}
void A32AddressSpace::EmitPrelude() {
using namespace oaknut::util;
UnprotectCodeMemory();
prelude_info.read_memory_8 = EmitCallTrampoline<&A32::UserCallbacks::MemoryRead8>(code, conf.callbacks);
prelude_info.read_memory_16 = EmitCallTrampoline<&A32::UserCallbacks::MemoryRead16>(code, conf.callbacks);
prelude_info.read_memory_32 = EmitCallTrampoline<&A32::UserCallbacks::MemoryRead32>(code, conf.callbacks);
prelude_info.read_memory_64 = EmitCallTrampoline<&A32::UserCallbacks::MemoryRead64>(code, conf.callbacks);
prelude_info.wrapped_read_memory_8 = EmitWrappedReadCallTrampoline<&A32::UserCallbacks::MemoryRead8>(code, conf.callbacks);
prelude_info.wrapped_read_memory_16 = EmitWrappedReadCallTrampoline<&A32::UserCallbacks::MemoryRead16>(code, conf.callbacks);
prelude_info.wrapped_read_memory_32 = EmitWrappedReadCallTrampoline<&A32::UserCallbacks::MemoryRead32>(code, conf.callbacks);
prelude_info.wrapped_read_memory_64 = EmitWrappedReadCallTrampoline<&A32::UserCallbacks::MemoryRead64>(code, conf.callbacks);
prelude_info.exclusive_read_memory_8 = EmitExclusiveReadCallTrampoline<&A32::UserCallbacks::MemoryRead8, u8>(code, conf);
prelude_info.exclusive_read_memory_16 = EmitExclusiveReadCallTrampoline<&A32::UserCallbacks::MemoryRead16, u16>(code, conf);
prelude_info.exclusive_read_memory_32 = EmitExclusiveReadCallTrampoline<&A32::UserCallbacks::MemoryRead32, u32>(code, conf);
prelude_info.exclusive_read_memory_64 = EmitExclusiveReadCallTrampoline<&A32::UserCallbacks::MemoryRead64, u64>(code, conf);
prelude_info.write_memory_8 = EmitCallTrampoline<&A32::UserCallbacks::MemoryWrite8>(code, conf.callbacks);
prelude_info.write_memory_16 = EmitCallTrampoline<&A32::UserCallbacks::MemoryWrite16>(code, conf.callbacks);
prelude_info.write_memory_32 = EmitCallTrampoline<&A32::UserCallbacks::MemoryWrite32>(code, conf.callbacks);
prelude_info.write_memory_64 = EmitCallTrampoline<&A32::UserCallbacks::MemoryWrite64>(code, conf.callbacks);
prelude_info.wrapped_write_memory_8 = EmitWrappedWriteCallTrampoline<&A32::UserCallbacks::MemoryWrite8>(code, conf.callbacks);
prelude_info.wrapped_write_memory_16 = EmitWrappedWriteCallTrampoline<&A32::UserCallbacks::MemoryWrite16>(code, conf.callbacks);
prelude_info.wrapped_write_memory_32 = EmitWrappedWriteCallTrampoline<&A32::UserCallbacks::MemoryWrite32>(code, conf.callbacks);
prelude_info.wrapped_write_memory_64 = EmitWrappedWriteCallTrampoline<&A32::UserCallbacks::MemoryWrite64>(code, conf.callbacks);
prelude_info.exclusive_write_memory_8 = EmitExclusiveWriteCallTrampoline<&A32::UserCallbacks::MemoryWriteExclusive8, u8>(code, conf);
prelude_info.exclusive_write_memory_16 = EmitExclusiveWriteCallTrampoline<&A32::UserCallbacks::MemoryWriteExclusive16, u16>(code, conf);
prelude_info.exclusive_write_memory_32 = EmitExclusiveWriteCallTrampoline<&A32::UserCallbacks::MemoryWriteExclusive32, u32>(code, conf);
prelude_info.exclusive_write_memory_64 = EmitExclusiveWriteCallTrampoline<&A32::UserCallbacks::MemoryWriteExclusive64, u64>(code, conf);
prelude_info.call_svc = EmitCallTrampoline<&A32::UserCallbacks::CallSVC>(code, conf.callbacks);
prelude_info.exception_raised = EmitCallTrampoline<&A32::UserCallbacks::ExceptionRaised>(code, conf.callbacks);
prelude_info.isb_raised = EmitCallTrampoline<&A32::UserCallbacks::InstructionSynchronizationBarrierRaised>(code, conf.callbacks);
prelude_info.add_ticks = EmitCallTrampoline<&A32::UserCallbacks::AddTicks>(code, conf.callbacks);
prelude_info.get_ticks_remaining = EmitCallTrampoline<&A32::UserCallbacks::GetTicksRemaining>(code, conf.callbacks);
oaknut::Label return_from_run_code, l_return_to_dispatcher;
prelude_info.run_code = code.xptr<PreludeInfo::RunCodeFuncType>();
{
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
code.MOV(X19, X0);
code.MOV(Xstate, X1);
code.MOV(Xhalt, X2);
if (conf.page_table) {
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
}
if (conf.fastmem_pointer) {
code.MOV(Xfastmem, *conf.fastmem_pointer);
}
if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer)) {
code.LDR(Xscratch0, l_return_to_dispatcher);
for (size_t i = 0; i < RSBCount; i++) {
code.STR(Xscratch0, SP, offsetof(StackLayout, rsb) + offsetof(RSBEntry, code_ptr) + i * sizeof(RSBEntry));
}
}
if (conf.enable_cycle_counting) {
code.BL(prelude_info.get_ticks_remaining);
code.MOV(Xticks, X0);
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
}
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
code.AND(Wscratch0, Wscratch0, 0xffff0000);
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
code.LDAR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, return_from_run_code);
code.BR(X19);
}
prelude_info.step_code = code.xptr<PreludeInfo::RunCodeFuncType>();
{
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
code.MOV(X19, X0);
code.MOV(Xstate, X1);
code.MOV(Xhalt, X2);
if (conf.page_table) {
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
}
if (conf.fastmem_pointer) {
code.MOV(Xfastmem, *conf.fastmem_pointer);
}
if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer)) {
code.LDR(Xscratch0, l_return_to_dispatcher);
for (size_t i = 0; i < RSBCount; i++) {
code.STR(Xscratch0, SP, offsetof(StackLayout, rsb) + offsetof(RSBEntry, code_ptr) + i * sizeof(RSBEntry));
}
}
if (conf.enable_cycle_counting) {
code.MOV(Xticks, 1);
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
}
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
code.AND(Wscratch0, Wscratch0, 0xffff0000);
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
oaknut::Label step_hr_loop;
code.l(step_hr_loop);
code.LDAXR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, return_from_run_code);
code.ORR(Wscratch0, Wscratch0, static_cast<u32>(HaltReason::Step));
code.STLXR(Wscratch1, Wscratch0, Xhalt);
code.CBNZ(Wscratch1, step_hr_loop);
code.BR(X19);
}
prelude_info.return_to_dispatcher = code.xptr<void*>();
{
oaknut::Label l_this, l_addr;
code.LDAR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, return_from_run_code);
if (conf.enable_cycle_counting) {
code.CMP(Xticks, 0);
code.B(LE, return_from_run_code);
}
code.LDR(X0, l_this);
code.MOV(X1, Xstate);
code.LDR(Xscratch0, l_addr);
code.BLR(Xscratch0);
code.BR(X0);
const auto fn = [](A32AddressSpace& self, A32JitState& context) -> CodePtr {
return self.GetOrEmit(context.GetLocationDescriptor());
};
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(this));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
}
prelude_info.return_from_run_code = code.xptr<void*>();
{
code.l(return_from_run_code);
if (conf.enable_cycle_counting) {
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
code.SUB(X1, X1, Xticks);
code.BL(prelude_info.add_ticks);
}
code.LDR(Wscratch0, SP, offsetof(StackLayout, save_host_fpcr));
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
oaknut::Label exit_hr_loop;
code.l(exit_hr_loop);
code.LDAXR(W0, Xhalt);
code.STLXR(Wscratch0, WZR, Xhalt);
code.CBNZ(Wscratch0, exit_hr_loop);
ABI_PopRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
code.RET();
}
code.align(8);
code.l(l_return_to_dispatcher);
code.dx(mcl::bit_cast<u64>(prelude_info.return_to_dispatcher));
prelude_info.end_of_prelude = code.offset();
mem.invalidate_all();
ProtectCodeMemory();
}
EmitConfig A32AddressSpace::GetEmitConfig() {
return EmitConfig{
.optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations,
.hook_isb = conf.hook_isb,
.cntfreq_el0{},
.ctr_el0{},
.dczid_el0{},
.tpidrro_el0{},
.tpidr_el0{},
.check_halt_on_memory_access = conf.check_halt_on_memory_access,
.page_table_pointer = mcl::bit_cast<u64>(conf.page_table),
.page_table_address_space_bits = 32,
.page_table_pointer_mask_bits = conf.page_table_pointer_mask_bits,
.silently_mirror_page_table = true,
.absolute_offset_page_table = conf.absolute_offset_page_table,
.detect_misaligned_access_via_page_table = conf.detect_misaligned_access_via_page_table,
.only_detect_misalignment_via_page_table_on_page_boundary = conf.only_detect_misalignment_via_page_table_on_page_boundary,
.fastmem_pointer = conf.fastmem_pointer,
.recompile_on_fastmem_failure = conf.recompile_on_fastmem_failure,
.fastmem_address_space_bits = 32,
.silently_mirror_fastmem = true,
.wall_clock_cntpct = conf.wall_clock_cntpct,
.enable_cycle_counting = conf.enable_cycle_counting,
.always_little_endian = conf.always_little_endian,
.descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return FP::FPCR{A32::LocationDescriptor{location}.FPSCR().Value()}; },
.emit_cond = EmitA32Cond,
.emit_condition_failed_terminal = EmitA32ConditionFailedTerminal,
.emit_terminal = EmitA32Terminal,
.emit_check_memory_abort = EmitA32CheckMemoryAbort,
.state_nzcv_offset = offsetof(A32JitState, cpsr_nzcv),
.state_fpsr_offset = offsetof(A32JitState, fpsr),
.state_exclusive_state_offset = offsetof(A32JitState, exclusive_state),
.coprocessors = conf.coprocessors,
.very_verbose_debugging_output = conf.very_verbose_debugging_output,
};
}
void A32AddressSpace::RegisterNewBasicBlock(const IR::Block& block, const EmittedBlockInfo&) {
const A32::LocationDescriptor descriptor{block.Location()};
const A32::LocationDescriptor end_location{block.EndLocation()};
const auto range = boost::icl::discrete_interval<u32>::closed(descriptor.PC(), end_location.PC() - 1);
block_ranges.AddRange(range, descriptor);
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,35 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include "dynarmic/backend/arm64/address_space.h"
#include "dynarmic/backend/block_range_information.h"
#include "dynarmic/interface/A32/config.h"
namespace Dynarmic::Backend::Arm64 {
struct EmittedBlockInfo;
class A32AddressSpace final : public AddressSpace {
public:
explicit A32AddressSpace(const A32::UserConfig& conf);
IR::Block GenerateIR(IR::LocationDescriptor) const override;
void InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges);
protected:
friend class A32Core;
void EmitPrelude();
EmitConfig GetEmitConfig() override;
void RegisterNewBasicBlock(const IR::Block& block, const EmittedBlockInfo& block_info) override;
const A32::UserConfig conf;
BlockRangeInformation<u32> block_ranges;
};
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,30 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include "dynarmic/backend/arm64/a32_address_space.h"
#include "dynarmic/backend/arm64/a32_jitstate.h"
namespace Dynarmic::Backend::Arm64 {
class A32Core final {
public:
explicit A32Core(const A32::UserConfig&) {}
HaltReason Run(A32AddressSpace& process, A32JitState& thread_ctx, volatile u32* halt_reason) {
const auto location_descriptor = thread_ctx.GetLocationDescriptor();
const auto entry_point = process.GetOrEmit(location_descriptor);
return process.prelude_info.run_code(entry_point, &thread_ctx, halt_reason);
}
HaltReason Step(A32AddressSpace& process, A32JitState& thread_ctx, volatile u32* halt_reason) {
const auto location_descriptor = A32::LocationDescriptor{thread_ctx.GetLocationDescriptor()}.SetSingleStepping(true);
const auto entry_point = process.GetOrEmit(location_descriptor);
return process.prelude_info.step_code(entry_point, &thread_ctx, halt_reason);
}
};
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,242 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2021 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <memory>
#include <mutex>
#include <boost/icl/interval_set.hpp>
#include "dynarmic/common/assert.h"
#include <mcl/scope_exit.hpp>
#include "dynarmic/common/common_types.h"
#include "dynarmic/backend/arm64/a32_address_space.h"
#include "dynarmic/backend/arm64/a32_core.h"
#include "dynarmic/backend/arm64/a32_jitstate.h"
#include "dynarmic/common/atomic.h"
#include "dynarmic/interface/A32/a32.h"
namespace Dynarmic::A32 {
using namespace Backend::Arm64;
struct Jit::Impl final {
Impl(Jit* jit_interface, A32::UserConfig conf)
: jit_interface(jit_interface)
, conf(conf)
, current_address_space(conf)
, core(conf) {}
HaltReason Run() {
ASSERT(!jit_interface->is_executing);
PerformRequestedCacheInvalidation(static_cast<HaltReason>(Atomic::Load(&halt_reason)));
jit_interface->is_executing = true;
SCOPE_EXIT {
jit_interface->is_executing = false;
};
HaltReason hr = core.Run(current_address_space, current_state, &halt_reason);
PerformRequestedCacheInvalidation(hr);
return hr;
}
HaltReason Step() {
ASSERT(!jit_interface->is_executing);
PerformRequestedCacheInvalidation(static_cast<HaltReason>(Atomic::Load(&halt_reason)));
jit_interface->is_executing = true;
SCOPE_EXIT {
jit_interface->is_executing = false;
};
HaltReason hr = core.Step(current_address_space, current_state, &halt_reason);
PerformRequestedCacheInvalidation(hr);
return hr;
}
void ClearCache() {
std::unique_lock lock{invalidation_mutex};
invalidate_entire_cache = true;
HaltExecution(HaltReason::CacheInvalidation);
}
void InvalidateCacheRange(std::uint32_t start_address, std::size_t length) {
std::unique_lock lock{invalidation_mutex};
invalid_cache_ranges.add(boost::icl::discrete_interval<u32>::closed(start_address, static_cast<u32>(start_address + length - 1)));
HaltExecution(HaltReason::CacheInvalidation);
}
void Reset() {
current_state = {};
}
void HaltExecution(HaltReason hr) {
Atomic::Or(&halt_reason, static_cast<u32>(hr));
Atomic::Barrier();
}
void ClearHalt(HaltReason hr) {
Atomic::And(&halt_reason, ~static_cast<u32>(hr));
Atomic::Barrier();
}
std::array<std::uint32_t, 16>& Regs() {
return current_state.regs;
}
const std::array<std::uint32_t, 16>& Regs() const {
return current_state.regs;
}
std::array<std::uint32_t, 64>& ExtRegs() {
return current_state.ext_regs;
}
const std::array<std::uint32_t, 64>& ExtRegs() const {
return current_state.ext_regs;
}
std::uint32_t Cpsr() const {
return current_state.Cpsr();
}
void SetCpsr(std::uint32_t value) {
current_state.SetCpsr(value);
}
std::uint32_t Fpscr() const {
return current_state.Fpscr();
}
void SetFpscr(std::uint32_t value) {
current_state.SetFpscr(value);
}
void ClearExclusiveState() {
current_state.exclusive_state = false;
}
void DumpDisassembly() const {
ASSERT_FALSE("Unimplemented");
}
private:
void PerformRequestedCacheInvalidation(HaltReason hr) {
if (Has(hr, HaltReason::CacheInvalidation)) {
std::unique_lock lock{invalidation_mutex};
ClearHalt(HaltReason::CacheInvalidation);
if (invalidate_entire_cache) {
current_address_space.ClearCache();
invalidate_entire_cache = false;
invalid_cache_ranges.clear();
return;
}
if (!invalid_cache_ranges.empty()) {
current_address_space.InvalidateCacheRanges(invalid_cache_ranges);
invalid_cache_ranges.clear();
return;
}
}
}
Jit* jit_interface;
A32::UserConfig conf;
A32JitState current_state{};
A32AddressSpace current_address_space;
A32Core core;
volatile u32 halt_reason = 0;
std::mutex invalidation_mutex;
boost::icl::interval_set<u32> invalid_cache_ranges;
bool invalidate_entire_cache = false;
};
Jit::Jit(UserConfig conf)
: impl(std::make_unique<Impl>(this, conf)) {}
Jit::~Jit() = default;
HaltReason Jit::Run() {
return impl->Run();
}
HaltReason Jit::Step() {
return impl->Step();
}
void Jit::ClearCache() {
impl->ClearCache();
}
void Jit::InvalidateCacheRange(std::uint32_t start_address, std::size_t length) {
impl->InvalidateCacheRange(start_address, length);
}
void Jit::Reset() {
impl->Reset();
}
void Jit::HaltExecution(HaltReason hr) {
impl->HaltExecution(hr);
}
void Jit::ClearHalt(HaltReason hr) {
impl->ClearHalt(hr);
}
std::array<std::uint32_t, 16>& Jit::Regs() {
return impl->Regs();
}
const std::array<std::uint32_t, 16>& Jit::Regs() const {
return impl->Regs();
}
std::array<std::uint32_t, 64>& Jit::ExtRegs() {
return impl->ExtRegs();
}
const std::array<std::uint32_t, 64>& Jit::ExtRegs() const {
return impl->ExtRegs();
}
std::uint32_t Jit::Cpsr() const {
return impl->Cpsr();
}
void Jit::SetCpsr(std::uint32_t value) {
impl->SetCpsr(value);
}
std::uint32_t Jit::Fpscr() const {
return impl->Fpscr();
}
void Jit::SetFpscr(std::uint32_t value) {
impl->SetFpscr(value);
}
void Jit::ClearExclusiveState() {
impl->ClearExclusiveState();
}
void Jit::DumpDisassembly() const {
impl->DumpDisassembly();
}
} // namespace Dynarmic::A32

View file

@ -0,0 +1,77 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/arm64/a32_jitstate.h"
#include <mcl/bit/bit_field.hpp>
#include "dynarmic/common/common_types.h"
namespace Dynarmic::Backend::Arm64 {
u32 A32JitState::Cpsr() const {
u32 cpsr = 0;
// NZCV flags
cpsr |= cpsr_nzcv;
// Q flag
cpsr |= cpsr_q;
// GE flags
cpsr |= mcl::bit::get_bit<31>(cpsr_ge) ? 1 << 19 : 0;
cpsr |= mcl::bit::get_bit<23>(cpsr_ge) ? 1 << 18 : 0;
cpsr |= mcl::bit::get_bit<15>(cpsr_ge) ? 1 << 17 : 0;
cpsr |= mcl::bit::get_bit<7>(cpsr_ge) ? 1 << 16 : 0;
// E flag, T flag
cpsr |= mcl::bit::get_bit<1>(upper_location_descriptor) ? 1 << 9 : 0;
cpsr |= mcl::bit::get_bit<0>(upper_location_descriptor) ? 1 << 5 : 0;
// IT state
cpsr |= static_cast<u32>(upper_location_descriptor & 0b11111100'00000000);
cpsr |= static_cast<u32>(upper_location_descriptor & 0b00000011'00000000) << 17;
// Other flags
cpsr |= cpsr_jaifm;
return cpsr;
}
void A32JitState::SetCpsr(u32 cpsr) {
// NZCV flags
cpsr_nzcv = cpsr & 0xF0000000;
// Q flag
cpsr_q = cpsr & (1 << 27);
// GE flags
cpsr_ge = 0;
cpsr_ge |= mcl::bit::get_bit<19>(cpsr) ? 0xFF000000 : 0;
cpsr_ge |= mcl::bit::get_bit<18>(cpsr) ? 0x00FF0000 : 0;
cpsr_ge |= mcl::bit::get_bit<17>(cpsr) ? 0x0000FF00 : 0;
cpsr_ge |= mcl::bit::get_bit<16>(cpsr) ? 0x000000FF : 0;
upper_location_descriptor &= 0xFFFF0000;
// E flag, T flag
upper_location_descriptor |= mcl::bit::get_bit<9>(cpsr) ? 2 : 0;
upper_location_descriptor |= mcl::bit::get_bit<5>(cpsr) ? 1 : 0;
// IT state
upper_location_descriptor |= (cpsr >> 0) & 0b11111100'00000000;
upper_location_descriptor |= (cpsr >> 17) & 0b00000011'00000000;
// Other flags
cpsr_jaifm = cpsr & 0x010001DF;
}
constexpr u32 FPCR_MASK = A32::LocationDescriptor::FPSCR_MODE_MASK;
constexpr u32 FPSR_MASK = 0x0800'009f;
u32 A32JitState::Fpscr() const {
return (upper_location_descriptor & 0xffff'0000) | fpsr | fpsr_nzcv;
}
void A32JitState::SetFpscr(u32 fpscr) {
fpsr_nzcv = fpscr & 0xf000'0000;
fpsr = fpscr & FPSR_MASK;
upper_location_descriptor = (upper_location_descriptor & 0x0000'ffff) | (fpscr & FPCR_MASK);
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,48 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2021 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <array>
#include "dynarmic/common/common_types.h"
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
#include "dynarmic/ir/location_descriptor.h"
namespace Dynarmic::Backend::Arm64 {
struct A32JitState {
u32 cpsr_nzcv = 0;
u32 cpsr_q = 0;
u32 cpsr_jaifm = 0;
u32 cpsr_ge = 0;
u32 fpsr = 0;
u32 fpsr_nzcv = 0;
std::array<u32, 16> regs{};
u32 upper_location_descriptor;
alignas(16) std::array<u32, 64> ext_regs{};
u32 exclusive_state = 0;
u32 Cpsr() const;
void SetCpsr(u32 cpsr);
u32 Fpscr() const;
void SetFpscr(u32 fpscr);
IR::LocationDescriptor GetLocationDescriptor() const {
return IR::LocationDescriptor{regs[15] | (static_cast<u64>(upper_location_descriptor) << 32)};
}
};
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,600 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/arm64/a64_address_space.h"
#include "dynarmic/backend/arm64/a64_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/devirtualize.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/stack_layout.h"
#include "dynarmic/common/cast_util.h"
#include "dynarmic/frontend/A64/a64_location_descriptor.h"
#include "dynarmic/frontend/A64/translate/a64_translate.h"
#include "dynarmic/interface/A64/config.h"
#include "dynarmic/interface/exclusive_monitor.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Backend::Arm64 {
template<auto mfp, typename T>
static void* EmitCallTrampoline(oaknut::CodeGenerator& code, T* this_) {
using namespace oaknut::util;
const auto info = Devirtualize<mfp>(this_);
oaknut::Label l_addr, l_this;
void* target = code.xptr<void*>();
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(info.this_ptr);
code.l(l_addr);
code.dx(info.fn_ptr);
return target;
}
template<auto mfp, typename T>
static void* EmitWrappedReadCallTrampoline(oaknut::CodeGenerator& code, T* this_) {
using namespace oaknut::util;
const auto info = Devirtualize<mfp>(this_);
oaknut::Label l_addr, l_this;
constexpr u64 save_regs = ABI_CALLER_SAVE & ~ToRegList(Xscratch0);
void* target = code.xptr<void*>();
ABI_PushRegisters(code, save_regs, 0);
code.LDR(X0, l_this);
code.MOV(X1, Xscratch0);
code.LDR(Xscratch0, l_addr);
code.BLR(Xscratch0);
code.MOV(Xscratch0, X0);
ABI_PopRegisters(code, save_regs, 0);
code.RET();
code.align(8);
code.l(l_this);
code.dx(info.this_ptr);
code.l(l_addr);
code.dx(info.fn_ptr);
return target;
}
template<auto callback, typename T>
static void* EmitExclusiveReadCallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
using namespace oaknut::util;
oaknut::Label l_addr, l_this;
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr) -> T {
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
return (conf.callbacks->*callback)(vaddr);
});
};
void* target = code.xptr<void*>();
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(&conf));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
return target;
}
template<auto mfp, typename T>
static void* EmitWrappedWriteCallTrampoline(oaknut::CodeGenerator& code, T* this_) {
using namespace oaknut::util;
const auto info = Devirtualize<mfp>(this_);
oaknut::Label l_addr, l_this;
constexpr u64 save_regs = ABI_CALLER_SAVE;
void* target = code.xptr<void*>();
ABI_PushRegisters(code, save_regs, 0);
code.LDR(X0, l_this);
code.MOV(X1, Xscratch0);
code.MOV(X2, Xscratch1);
code.LDR(Xscratch0, l_addr);
code.BLR(Xscratch0);
ABI_PopRegisters(code, save_regs, 0);
code.RET();
code.align(8);
code.l(l_this);
code.dx(info.this_ptr);
code.l(l_addr);
code.dx(info.fn_ptr);
return target;
}
template<auto callback, typename T>
static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
using namespace oaknut::util;
oaknut::Label l_addr, l_this;
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, T value) -> u32 {
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
[&](T expected) -> bool {
return (conf.callbacks->*callback)(vaddr, value, expected);
})
? 0
: 1;
};
void* target = code.xptr<void*>();
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(&conf));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
return target;
}
static void* EmitRead128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) {
using namespace oaknut::util;
const auto info = Devirtualize<&A64::UserCallbacks::MemoryRead128>(this_);
oaknut::Label l_addr, l_this;
void* target = code.xptr<void*>();
ABI_PushRegisters(code, (1ull << 29) | (1ull << 30), 0);
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BLR(Xscratch0);
code.FMOV(D0, X0);
code.FMOV(V0.D()[1], X1);
ABI_PopRegisters(code, (1ull << 29) | (1ull << 30), 0);
code.RET();
code.align(8);
code.l(l_this);
code.dx(info.this_ptr);
code.l(l_addr);
code.dx(info.fn_ptr);
return target;
}
static void* EmitWrappedRead128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) {
using namespace oaknut::util;
const auto info = Devirtualize<&A64::UserCallbacks::MemoryRead128>(this_);
oaknut::Label l_addr, l_this;
constexpr u64 save_regs = ABI_CALLER_SAVE & ~ToRegList(Q0);
void* target = code.xptr<void*>();
ABI_PushRegisters(code, save_regs, 0);
code.LDR(X0, l_this);
code.MOV(X1, Xscratch0);
code.LDR(Xscratch0, l_addr);
code.BLR(Xscratch0);
code.FMOV(D0, X0);
code.FMOV(V0.D()[1], X1);
ABI_PopRegisters(code, save_regs, 0);
code.RET();
code.align(8);
code.l(l_this);
code.dx(info.this_ptr);
code.l(l_addr);
code.dx(info.fn_ptr);
return target;
}
static void* EmitExclusiveRead128CallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
using namespace oaknut::util;
oaknut::Label l_addr, l_this;
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr) -> Vector {
return conf.global_monitor->ReadAndMark<Vector>(conf.processor_id, vaddr, [&]() -> Vector {
return conf.callbacks->MemoryRead128(vaddr);
});
};
void* target = code.xptr<void*>();
ABI_PushRegisters(code, (1ull << 29) | (1ull << 30), 0);
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BLR(Xscratch0);
code.FMOV(D0, X0);
code.FMOV(V0.D()[1], X1);
ABI_PopRegisters(code, (1ull << 29) | (1ull << 30), 0);
code.RET();
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(&conf));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
return target;
}
static void* EmitWrite128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) {
using namespace oaknut::util;
const auto info = Devirtualize<&A64::UserCallbacks::MemoryWrite128>(this_);
oaknut::Label l_addr, l_this;
void* target = code.xptr<void*>();
code.LDR(X0, l_this);
code.FMOV(X2, D0);
code.FMOV(X3, V0.D()[1]);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(info.this_ptr);
code.l(l_addr);
code.dx(info.fn_ptr);
return target;
}
static void* EmitWrappedWrite128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) {
using namespace oaknut::util;
const auto info = Devirtualize<&A64::UserCallbacks::MemoryWrite128>(this_);
oaknut::Label l_addr, l_this;
constexpr u64 save_regs = ABI_CALLER_SAVE;
void* target = code.xptr<void*>();
ABI_PushRegisters(code, save_regs, 0);
code.LDR(X0, l_this);
code.MOV(X1, Xscratch0);
code.FMOV(X2, D0);
code.FMOV(X3, V0.D()[1]);
code.LDR(Xscratch0, l_addr);
code.BLR(Xscratch0);
ABI_PopRegisters(code, save_regs, 0);
code.RET();
code.align(8);
code.l(l_this);
code.dx(info.this_ptr);
code.l(l_addr);
code.dx(info.fn_ptr);
return target;
}
static void* EmitExclusiveWrite128CallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
using namespace oaknut::util;
oaknut::Label l_addr, l_this;
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, Vector value) -> u32 {
return conf.global_monitor->DoExclusiveOperation<Vector>(conf.processor_id, vaddr,
[&](Vector expected) -> bool {
return conf.callbacks->MemoryWriteExclusive128(vaddr, value, expected);
})
? 0
: 1;
};
void* target = code.xptr<void*>();
code.LDR(X0, l_this);
code.FMOV(X2, D0);
code.FMOV(X3, V0.D()[1]);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(&conf));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
return target;
}
A64AddressSpace::A64AddressSpace(const A64::UserConfig& conf)
: AddressSpace(conf.code_cache_size)
, conf(conf) {
EmitPrelude();
}
IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code,
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
Optimization::A64CallbackConfigPass(ir_block, conf);
Optimization::NamingPass(ir_block);
if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) {
Optimization::A64GetSetElimination(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) {
Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks);
}
Optimization::VerificationPass(ir_block);
return ir_block;
}
void A64AddressSpace::InvalidateCacheRanges(const boost::icl::interval_set<u64>& ranges) {
InvalidateBasicBlocks(block_ranges.InvalidateRanges(ranges));
}
void A64AddressSpace::EmitPrelude() {
using namespace oaknut::util;
UnprotectCodeMemory();
prelude_info.read_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead8>(code, conf.callbacks);
prelude_info.read_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead16>(code, conf.callbacks);
prelude_info.read_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead32>(code, conf.callbacks);
prelude_info.read_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead64>(code, conf.callbacks);
prelude_info.read_memory_128 = EmitRead128CallTrampoline(code, conf.callbacks);
prelude_info.wrapped_read_memory_8 = EmitWrappedReadCallTrampoline<&A64::UserCallbacks::MemoryRead8>(code, conf.callbacks);
prelude_info.wrapped_read_memory_16 = EmitWrappedReadCallTrampoline<&A64::UserCallbacks::MemoryRead16>(code, conf.callbacks);
prelude_info.wrapped_read_memory_32 = EmitWrappedReadCallTrampoline<&A64::UserCallbacks::MemoryRead32>(code, conf.callbacks);
prelude_info.wrapped_read_memory_64 = EmitWrappedReadCallTrampoline<&A64::UserCallbacks::MemoryRead64>(code, conf.callbacks);
prelude_info.wrapped_read_memory_128 = EmitWrappedRead128CallTrampoline(code, conf.callbacks);
prelude_info.exclusive_read_memory_8 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead8, u8>(code, conf);
prelude_info.exclusive_read_memory_16 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead16, u16>(code, conf);
prelude_info.exclusive_read_memory_32 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead32, u32>(code, conf);
prelude_info.exclusive_read_memory_64 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead64, u64>(code, conf);
prelude_info.exclusive_read_memory_128 = EmitExclusiveRead128CallTrampoline(code, conf);
prelude_info.write_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite8>(code, conf.callbacks);
prelude_info.write_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite16>(code, conf.callbacks);
prelude_info.write_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite32>(code, conf.callbacks);
prelude_info.write_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite64>(code, conf.callbacks);
prelude_info.write_memory_128 = EmitWrite128CallTrampoline(code, conf.callbacks);
prelude_info.wrapped_write_memory_8 = EmitWrappedWriteCallTrampoline<&A64::UserCallbacks::MemoryWrite8>(code, conf.callbacks);
prelude_info.wrapped_write_memory_16 = EmitWrappedWriteCallTrampoline<&A64::UserCallbacks::MemoryWrite16>(code, conf.callbacks);
prelude_info.wrapped_write_memory_32 = EmitWrappedWriteCallTrampoline<&A64::UserCallbacks::MemoryWrite32>(code, conf.callbacks);
prelude_info.wrapped_write_memory_64 = EmitWrappedWriteCallTrampoline<&A64::UserCallbacks::MemoryWrite64>(code, conf.callbacks);
prelude_info.wrapped_write_memory_128 = EmitWrappedWrite128CallTrampoline(code, conf.callbacks);
prelude_info.exclusive_write_memory_8 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive8, u8>(code, conf);
prelude_info.exclusive_write_memory_16 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive16, u16>(code, conf);
prelude_info.exclusive_write_memory_32 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive32, u32>(code, conf);
prelude_info.exclusive_write_memory_64 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive64, u64>(code, conf);
prelude_info.exclusive_write_memory_128 = EmitExclusiveWrite128CallTrampoline(code, conf);
prelude_info.call_svc = EmitCallTrampoline<&A64::UserCallbacks::CallSVC>(code, conf.callbacks);
prelude_info.exception_raised = EmitCallTrampoline<&A64::UserCallbacks::ExceptionRaised>(code, conf.callbacks);
prelude_info.isb_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionSynchronizationBarrierRaised>(code, conf.callbacks);
prelude_info.ic_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionCacheOperationRaised>(code, conf.callbacks);
prelude_info.dc_raised = EmitCallTrampoline<&A64::UserCallbacks::DataCacheOperationRaised>(code, conf.callbacks);
prelude_info.get_cntpct = EmitCallTrampoline<&A64::UserCallbacks::GetCNTPCT>(code, conf.callbacks);
prelude_info.add_ticks = EmitCallTrampoline<&A64::UserCallbacks::AddTicks>(code, conf.callbacks);
prelude_info.get_ticks_remaining = EmitCallTrampoline<&A64::UserCallbacks::GetTicksRemaining>(code, conf.callbacks);
oaknut::Label return_from_run_code, l_return_to_dispatcher;
prelude_info.run_code = code.xptr<PreludeInfo::RunCodeFuncType>();
{
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
code.MOV(X19, X0);
code.MOV(Xstate, X1);
code.MOV(Xhalt, X2);
if (conf.page_table) {
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
}
if (conf.fastmem_pointer) {
code.MOV(Xfastmem, *conf.fastmem_pointer);
}
if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer)) {
code.LDR(Xscratch0, l_return_to_dispatcher);
for (size_t i = 0; i < RSBCount; i++) {
code.STR(Xscratch0, SP, offsetof(StackLayout, rsb) + offsetof(RSBEntry, code_ptr) + i * sizeof(RSBEntry));
}
}
if (conf.enable_cycle_counting) {
code.BL(prelude_info.get_ticks_remaining);
code.MOV(Xticks, X0);
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
}
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr));
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
code.LDAR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, return_from_run_code);
code.BR(X19);
}
prelude_info.step_code = code.xptr<PreludeInfo::RunCodeFuncType>();
{
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
code.MOV(X19, X0);
code.MOV(Xstate, X1);
code.MOV(Xhalt, X2);
if (conf.page_table) {
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
}
if (conf.fastmem_pointer) {
code.MOV(Xfastmem, *conf.fastmem_pointer);
}
if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer)) {
code.LDR(Xscratch0, l_return_to_dispatcher);
for (size_t i = 0; i < RSBCount; i++) {
code.STR(Xscratch0, SP, offsetof(StackLayout, rsb) + offsetof(RSBEntry, code_ptr) + i * sizeof(RSBEntry));
}
}
if (conf.enable_cycle_counting) {
code.MOV(Xticks, 1);
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
}
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr));
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
oaknut::Label step_hr_loop;
code.l(step_hr_loop);
code.LDAXR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, return_from_run_code);
code.ORR(Wscratch0, Wscratch0, static_cast<u32>(HaltReason::Step));
code.STLXR(Wscratch1, Wscratch0, Xhalt);
code.CBNZ(Wscratch1, step_hr_loop);
code.BR(X19);
}
prelude_info.return_to_dispatcher = code.xptr<void*>();
{
oaknut::Label l_this, l_addr;
code.LDAR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, return_from_run_code);
if (conf.enable_cycle_counting) {
code.CMP(Xticks, 0);
code.B(LE, return_from_run_code);
}
code.LDR(X0, l_this);
code.MOV(X1, Xstate);
code.LDR(Xscratch0, l_addr);
code.BLR(Xscratch0);
code.BR(X0);
const auto fn = [](A64AddressSpace& self, A64JitState& context) -> CodePtr {
return self.GetOrEmit(context.GetLocationDescriptor());
};
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(this));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
}
prelude_info.return_from_run_code = code.xptr<void*>();
{
code.l(return_from_run_code);
if (conf.enable_cycle_counting) {
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
code.SUB(X1, X1, Xticks);
code.BL(prelude_info.add_ticks);
}
code.LDR(Wscratch0, SP, offsetof(StackLayout, save_host_fpcr));
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
oaknut::Label exit_hr_loop;
code.l(exit_hr_loop);
code.LDAXR(W0, Xhalt);
code.STLXR(Wscratch0, WZR, Xhalt);
code.CBNZ(Wscratch0, exit_hr_loop);
ABI_PopRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
code.RET();
}
code.align(8);
code.l(l_return_to_dispatcher);
code.dx(mcl::bit_cast<u64>(prelude_info.return_to_dispatcher));
prelude_info.end_of_prelude = code.offset();
mem.invalidate_all();
ProtectCodeMemory();
}
EmitConfig A64AddressSpace::GetEmitConfig() {
return EmitConfig{
.optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations,
.hook_isb = conf.hook_isb,
.cntfreq_el0 = conf.cntfrq_el0,
.ctr_el0 = conf.ctr_el0,
.dczid_el0 = conf.dczid_el0,
.tpidrro_el0 = conf.tpidrro_el0,
.tpidr_el0 = conf.tpidr_el0,
.check_halt_on_memory_access = conf.check_halt_on_memory_access,
.page_table_pointer = mcl::bit_cast<u64>(conf.page_table),
.page_table_address_space_bits = conf.page_table_address_space_bits,
.page_table_pointer_mask_bits = conf.page_table_pointer_mask_bits,
.silently_mirror_page_table = conf.silently_mirror_page_table,
.absolute_offset_page_table = conf.absolute_offset_page_table,
.detect_misaligned_access_via_page_table = conf.detect_misaligned_access_via_page_table,
.only_detect_misalignment_via_page_table_on_page_boundary = conf.only_detect_misalignment_via_page_table_on_page_boundary,
.fastmem_pointer = conf.fastmem_pointer,
.recompile_on_fastmem_failure = conf.recompile_on_fastmem_failure,
.fastmem_address_space_bits = conf.fastmem_address_space_bits,
.silently_mirror_fastmem = conf.silently_mirror_fastmem,
.wall_clock_cntpct = conf.wall_clock_cntpct,
.enable_cycle_counting = conf.enable_cycle_counting,
.always_little_endian = true,
.descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return A64::LocationDescriptor{location}.FPCR(); },
.emit_cond = EmitA64Cond,
.emit_condition_failed_terminal = EmitA64ConditionFailedTerminal,
.emit_terminal = EmitA64Terminal,
.emit_check_memory_abort = EmitA64CheckMemoryAbort,
.state_nzcv_offset = offsetof(A64JitState, cpsr_nzcv),
.state_fpsr_offset = offsetof(A64JitState, fpsr),
.state_exclusive_state_offset = offsetof(A64JitState, exclusive_state),
.coprocessors{},
.very_verbose_debugging_output = conf.very_verbose_debugging_output,
};
}
void A64AddressSpace::RegisterNewBasicBlock(const IR::Block& block, const EmittedBlockInfo&) {
const A64::LocationDescriptor descriptor{block.Location()};
const A64::LocationDescriptor end_location{block.EndLocation()};
const auto range = boost::icl::discrete_interval<u64>::closed(descriptor.PC(), end_location.PC() - 1);
block_ranges.AddRange(range, descriptor);
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,35 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include "dynarmic/backend/arm64/address_space.h"
#include "dynarmic/backend/block_range_information.h"
#include "dynarmic/interface/A64/config.h"
namespace Dynarmic::Backend::Arm64 {
struct EmittedBlockInfo;
class A64AddressSpace final : public AddressSpace {
public:
explicit A64AddressSpace(const A64::UserConfig& conf);
IR::Block GenerateIR(IR::LocationDescriptor) const override;
void InvalidateCacheRanges(const boost::icl::interval_set<u64>& ranges);
protected:
friend class A64Core;
void EmitPrelude();
EmitConfig GetEmitConfig() override;
void RegisterNewBasicBlock(const IR::Block& block, const EmittedBlockInfo& block_info) override;
const A64::UserConfig conf;
BlockRangeInformation<u64> block_ranges;
};
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,30 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include "dynarmic/backend/arm64/a64_address_space.h"
#include "dynarmic/backend/arm64/a64_jitstate.h"
namespace Dynarmic::Backend::Arm64 {
class A64Core final {
public:
explicit A64Core(const A64::UserConfig&) {}
HaltReason Run(A64AddressSpace& process, A64JitState& thread_ctx, volatile u32* halt_reason) {
const auto location_descriptor = thread_ctx.GetLocationDescriptor();
const auto entry_point = process.GetOrEmit(location_descriptor);
return process.prelude_info.run_code(entry_point, &thread_ctx, halt_reason);
}
HaltReason Step(A64AddressSpace& process, A64JitState& thread_ctx, volatile u32* halt_reason) {
const auto location_descriptor = A64::LocationDescriptor{thread_ctx.GetLocationDescriptor()}.SetSingleStepping(true);
const auto entry_point = process.GetOrEmit(location_descriptor);
return process.prelude_info.step_code(entry_point, &thread_ctx, halt_reason);
}
};
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,326 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <memory>
#include <mutex>
#include <boost/icl/interval_set.hpp>
#include "dynarmic/common/assert.h"
#include <mcl/scope_exit.hpp>
#include "dynarmic/common/common_types.h"
#include "dynarmic/backend/arm64/a64_address_space.h"
#include "dynarmic/backend/arm64/a64_core.h"
#include "dynarmic/backend/arm64/a64_jitstate.h"
#include "dynarmic/common/atomic.h"
#include "dynarmic/interface/A64/a64.h"
#include "dynarmic/interface/A64/config.h"
namespace Dynarmic::A64 {
using namespace Backend::Arm64;
struct Jit::Impl final {
Impl(Jit*, A64::UserConfig conf)
: conf(conf)
, current_address_space(conf)
, core(conf) {}
HaltReason Run() {
ASSERT(!is_executing);
PerformRequestedCacheInvalidation(static_cast<HaltReason>(Atomic::Load(&halt_reason)));
is_executing = true;
SCOPE_EXIT {
is_executing = false;
};
HaltReason hr = core.Run(current_address_space, current_state, &halt_reason);
PerformRequestedCacheInvalidation(hr);
return hr;
}
HaltReason Step() {
ASSERT(!is_executing);
PerformRequestedCacheInvalidation(static_cast<HaltReason>(Atomic::Load(&halt_reason)));
is_executing = true;
SCOPE_EXIT {
is_executing = false;
};
HaltReason hr = core.Step(current_address_space, current_state, &halt_reason);
PerformRequestedCacheInvalidation(hr);
return hr;
}
void ClearCache() {
std::unique_lock lock{invalidation_mutex};
invalidate_entire_cache = true;
HaltExecution(HaltReason::CacheInvalidation);
}
void InvalidateCacheRange(std::uint64_t start_address, std::size_t length) {
std::unique_lock lock{invalidation_mutex};
invalid_cache_ranges.add(boost::icl::discrete_interval<u64>::closed(start_address, start_address + length - 1));
HaltExecution(HaltReason::CacheInvalidation);
}
void Reset() {
current_state = {};
}
void HaltExecution(HaltReason hr) {
Atomic::Or(&halt_reason, static_cast<u32>(hr));
}
void ClearHalt(HaltReason hr) {
Atomic::And(&halt_reason, ~static_cast<u32>(hr));
}
std::uint64_t PC() const {
return current_state.pc;
}
void SetPC(std::uint64_t value) {
current_state.pc = value;
}
std::uint64_t SP() const {
return current_state.sp;
}
void SetSP(std::uint64_t value) {
current_state.sp = value;
}
std::array<std::uint64_t, 31>& Regs() {
return current_state.reg;
}
const std::array<std::uint64_t, 31>& Regs() const {
return current_state.reg;
}
std::array<std::uint64_t, 64>& VecRegs() {
return current_state.vec;
}
const std::array<std::uint64_t, 64>& VecRegs() const {
return current_state.vec;
}
std::uint32_t Fpcr() const {
return current_state.fpcr;
}
void SetFpcr(std::uint32_t value) {
current_state.fpcr = value;
}
std::uint32_t Fpsr() const {
return current_state.fpsr;
}
void SetFpsr(std::uint32_t value) {
current_state.fpsr = value;
}
std::uint32_t Pstate() const {
return current_state.cpsr_nzcv;
}
void SetPstate(std::uint32_t value) {
current_state.cpsr_nzcv = value;
}
void ClearExclusiveState() {
current_state.exclusive_state = false;
}
bool IsExecuting() const {
return is_executing;
}
void DumpDisassembly() const {
current_address_space.DumpDisassembly();
}
std::vector<std::string> Disassemble() const {
ASSERT_FALSE("Unimplemented");
}
private:
void PerformRequestedCacheInvalidation(HaltReason hr) {
if (Has(hr, HaltReason::CacheInvalidation)) {
std::unique_lock lock{invalidation_mutex};
ClearHalt(HaltReason::CacheInvalidation);
if (invalidate_entire_cache) {
current_address_space.ClearCache();
invalidate_entire_cache = false;
invalid_cache_ranges.clear();
return;
}
if (!invalid_cache_ranges.empty()) {
current_address_space.InvalidateCacheRanges(invalid_cache_ranges);
invalid_cache_ranges.clear();
return;
}
}
}
A64::UserConfig conf;
A64JitState current_state{};
A64AddressSpace current_address_space;
A64Core core;
volatile u32 halt_reason = 0;
std::mutex invalidation_mutex;
boost::icl::interval_set<u64> invalid_cache_ranges;
bool invalidate_entire_cache = false;
bool is_executing = false;
};
Jit::Jit(UserConfig conf)
: impl{std::make_unique<Jit::Impl>(this, conf)} {
}
Jit::~Jit() = default;
HaltReason Jit::Run() {
return impl->Run();
}
HaltReason Jit::Step() {
return impl->Step();
}
void Jit::ClearCache() {
impl->ClearCache();
}
void Jit::InvalidateCacheRange(std::uint64_t start_address, std::size_t length) {
impl->InvalidateCacheRange(start_address, length);
}
void Jit::Reset() {
impl->Reset();
}
void Jit::HaltExecution(HaltReason hr) {
impl->HaltExecution(hr);
}
void Jit::ClearHalt(HaltReason hr) {
impl->ClearHalt(hr);
}
std::uint64_t Jit::GetSP() const {
return impl->SP();
}
void Jit::SetSP(std::uint64_t value) {
impl->SetSP(value);
}
std::uint64_t Jit::GetPC() const {
return impl->PC();
}
void Jit::SetPC(std::uint64_t value) {
impl->SetPC(value);
}
std::uint64_t Jit::GetRegister(std::size_t index) const {
return impl->Regs()[index];
}
void Jit::SetRegister(size_t index, std::uint64_t value) {
impl->Regs()[index] = value;
}
std::array<std::uint64_t, 31> Jit::GetRegisters() const {
return impl->Regs();
}
void Jit::SetRegisters(const std::array<std::uint64_t, 31>& value) {
impl->Regs() = value;
}
Vector Jit::GetVector(std::size_t index) const {
auto& vec = impl->VecRegs();
return {vec[index * 2], vec[index * 2 + 1]};
}
void Jit::SetVector(std::size_t index, Vector value) {
auto& vec = impl->VecRegs();
vec[index * 2] = value[0];
vec[index * 2 + 1] = value[1];
}
std::array<Vector, 32> Jit::GetVectors() const {
std::array<Vector, 32> ret;
std::memcpy(ret.data(), impl->VecRegs().data(), sizeof(ret));
return ret;
}
void Jit::SetVectors(const std::array<Vector, 32>& value) {
std::memcpy(impl->VecRegs().data(), value.data(), sizeof(value));
}
std::uint32_t Jit::GetFpcr() const {
return impl->Fpcr();
}
void Jit::SetFpcr(std::uint32_t value) {
impl->SetFpcr(value);
}
std::uint32_t Jit::GetFpsr() const {
return impl->Fpsr();
}
void Jit::SetFpsr(std::uint32_t value) {
impl->SetFpsr(value);
}
std::uint32_t Jit::GetPstate() const {
return impl->Pstate();
}
void Jit::SetPstate(std::uint32_t value) {
impl->SetPstate(value);
}
void Jit::ClearExclusiveState() {
impl->ClearExclusiveState();
}
bool Jit::IsExecuting() const {
return impl->IsExecuting();
}
void Jit::DumpDisassembly() const {
impl->DumpDisassembly();
}
std::vector<std::string> Jit::Disassemble() const {
return impl->Disassemble();
}
} // namespace Dynarmic::A64

View file

@ -0,0 +1,40 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <array>
#include "dynarmic/common/common_types.h"
#include "dynarmic/frontend/A64/a64_location_descriptor.h"
namespace Dynarmic::Backend::Arm64 {
struct A64JitState {
std::array<u64, 31> reg{};
u64 sp = 0;
u64 pc = 0;
u32 cpsr_nzcv = 0;
alignas(16) std::array<u64, 64> vec{};
u32 exclusive_state = 0;
u32 fpsr = 0;
u32 fpcr = 0;
IR::LocationDescriptor GetLocationDescriptor() const {
const u64 fpcr_u64 = static_cast<u64>(fpcr & A64::LocationDescriptor::fpcr_mask) << A64::LocationDescriptor::fpcr_shift;
const u64 pc_u64 = pc & A64::LocationDescriptor::pc_mask;
return IR::LocationDescriptor{pc_u64 | fpcr_u64};
}
};
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,94 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/arm64/abi.h"
#include <vector>
#include <mcl/bit/bit_field.hpp>
#include "dynarmic/common/common_types.h"
#include <oaknut/oaknut.hpp>
namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
static constexpr size_t gpr_size = 8;
static constexpr size_t fpr_size = 16;
struct FrameInfo {
std::vector<int> gprs;
std::vector<int> fprs;
size_t frame_size;
size_t gprs_size;
size_t fprs_size;
};
static std::vector<int> ListToIndexes(u32 list) {
std::vector<int> indexes;
for (int i = 0; i < 32; i++) {
if (mcl::bit::get_bit(i, list)) {
indexes.push_back(i);
}
}
return indexes;
}
static FrameInfo CalculateFrameInfo(RegisterList rl, size_t frame_size) {
const auto gprs = ListToIndexes(static_cast<u32>(rl));
const auto fprs = ListToIndexes(static_cast<u32>(rl >> 32));
const size_t num_gprs = gprs.size();
const size_t num_fprs = fprs.size();
const size_t gprs_size = (num_gprs + 1) / 2 * 16;
const size_t fprs_size = num_fprs * 16;
return {
gprs,
fprs,
frame_size,
gprs_size,
fprs_size,
};
}
#define DO_IT(TYPE, REG_TYPE, PAIR_OP, SINGLE_OP, OFFSET) \
if (frame_info.TYPE##s.size() > 0) { \
for (size_t i = 0; i < frame_info.TYPE##s.size() - 1; i += 2) { \
code.PAIR_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, oaknut::REG_TYPE{frame_info.TYPE##s[i + 1]}, SP, (OFFSET) + i * TYPE##_size); \
} \
if (frame_info.TYPE##s.size() % 2 == 1) { \
const size_t i = frame_info.TYPE##s.size() - 1; \
code.SINGLE_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, SP, (OFFSET) + i * TYPE##_size); \
} \
}
void ABI_PushRegisters(oaknut::CodeGenerator& code, RegisterList rl, size_t frame_size) {
const FrameInfo frame_info = CalculateFrameInfo(rl, frame_size);
code.SUB(SP, SP, frame_info.gprs_size + frame_info.fprs_size);
DO_IT(gpr, XReg, STP, STR, 0)
DO_IT(fpr, QReg, STP, STR, frame_info.gprs_size)
code.SUB(SP, SP, frame_info.frame_size);
}
void ABI_PopRegisters(oaknut::CodeGenerator& code, RegisterList rl, size_t frame_size) {
const FrameInfo frame_info = CalculateFrameInfo(rl, frame_size);
code.ADD(SP, SP, frame_info.frame_size);
DO_IT(gpr, XReg, LDP, LDR, 0)
DO_IT(fpr, QReg, LDP, LDR, frame_info.gprs_size)
code.ADD(SP, SP, frame_info.gprs_size + frame_info.fprs_size);
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,80 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <initializer_list>
#include <stdexcept>
#include <type_traits>
#include <mcl/mp/metavalue/lift_value.hpp>
#include "dynarmic/common/common_types.h"
#include <oaknut/oaknut.hpp>
#include "dynarmic/common/always_false.h"
namespace Dynarmic::Backend::Arm64 {
constexpr oaknut::XReg Xstate{28};
constexpr oaknut::XReg Xhalt{27};
constexpr oaknut::XReg Xticks{26};
constexpr oaknut::XReg Xfastmem{25};
constexpr oaknut::XReg Xpagetable{24};
constexpr oaknut::XReg Xscratch0{16}, Xscratch1{17}, Xscratch2{30};
constexpr oaknut::WReg Wscratch0{16}, Wscratch1{17}, Wscratch2{30};
template<size_t bitsize>
constexpr auto Rscratch0() {
if constexpr (bitsize == 32) {
return Wscratch0;
} else if constexpr (bitsize == 64) {
return Xscratch0;
} else {
static_assert(Common::always_false_v<mcl::mp::lift_value<bitsize>>);
}
}
template<size_t bitsize>
constexpr auto Rscratch1() {
if constexpr (bitsize == 32) {
return Wscratch1;
} else if constexpr (bitsize == 64) {
return Xscratch1;
} else {
static_assert(Common::always_false_v<mcl::mp::lift_value<bitsize>>);
}
}
constexpr std::initializer_list<int> GPR_ORDER{19, 20, 21, 22, 23, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8};
constexpr std::initializer_list<int> FPR_ORDER{8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
using RegisterList = u64;
constexpr RegisterList ToRegList(oaknut::Reg reg) {
if (reg.is_vector()) {
return RegisterList{1} << (reg.index() + 32);
}
if (reg.index() == 31) {
ASSERT_FALSE("ZR not allowed in reg list");
}
if (reg.index() == -1) {
return RegisterList{1} << 31;
}
return RegisterList{1} << reg.index();
}
constexpr RegisterList ABI_CALLEE_SAVE = 0x0000ff00'7ff80000;
constexpr RegisterList ABI_CALLER_SAVE = 0xffffffff'4000ffff;
void ABI_PushRegisters(oaknut::CodeGenerator& code, RegisterList rl, size_t stack_space);
void ABI_PopRegisters(oaknut::CodeGenerator& code, RegisterList rl, size_t stack_space);
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,353 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <cstdio>
#include <mcl/bit_cast.hpp>
#include "dynarmic/backend/arm64/a64_address_space.h"
#include "dynarmic/backend/arm64/a64_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/devirtualize.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/stack_layout.h"
#include "dynarmic/common/cast_util.h"
#include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/llvm_disassemble.h"
#include "dynarmic/interface/exclusive_monitor.h"
namespace Dynarmic::Backend::Arm64 {
AddressSpace::AddressSpace(size_t code_cache_size)
: code_cache_size(code_cache_size)
, mem(code_cache_size)
, code(mem.ptr(), mem.ptr())
, fastmem_manager(exception_handler) {
ASSERT_MSG(code_cache_size <= 128 * 1024 * 1024, "code_cache_size > 128 MiB not currently supported");
exception_handler.Register(mem, code_cache_size);
exception_handler.SetFastmemCallback([this](u64 host_pc) {
return FastmemCallback(host_pc);
});
}
AddressSpace::~AddressSpace() = default;
CodePtr AddressSpace::Get(IR::LocationDescriptor descriptor) {
if (const auto iter = block_entries.find(descriptor); iter != block_entries.end()) {
return iter->second;
}
return nullptr;
}
std::optional<IR::LocationDescriptor> AddressSpace::ReverseGetLocation(CodePtr host_pc) {
if (auto iter = reverse_block_entries.upper_bound(host_pc); iter != reverse_block_entries.begin()) {
// upper_bound locates the first value greater than host_pc, so we need to decrement
--iter;
return iter->second;
}
return std::nullopt;
}
CodePtr AddressSpace::ReverseGetEntryPoint(CodePtr host_pc) {
if (auto iter = reverse_block_entries.upper_bound(host_pc); iter != reverse_block_entries.begin()) {
// upper_bound locates the first value greater than host_pc, so we need to decrement
--iter;
return iter->first;
}
return nullptr;
}
CodePtr AddressSpace::GetOrEmit(IR::LocationDescriptor descriptor) {
if (CodePtr block_entry = Get(descriptor)) {
return block_entry;
}
IR::Block ir_block = GenerateIR(descriptor);
const EmittedBlockInfo block_info = Emit(std::move(ir_block));
return block_info.entry_point;
}
void AddressSpace::InvalidateBasicBlocks(const ankerl::unordered_dense::set<IR::LocationDescriptor>& descriptors) {
UnprotectCodeMemory();
for (const auto& descriptor : descriptors) {
const auto iter = block_entries.find(descriptor);
if (iter == block_entries.end()) {
continue;
}
// Unlink before removal because InvalidateBasicBlocks can be called within a fastmem callback,
// and the currently executing block may have references to itself which need to be unlinked.
RelinkForDescriptor(descriptor, nullptr);
block_entries.erase(iter);
}
ProtectCodeMemory();
}
void AddressSpace::ClearCache() {
block_entries.clear();
reverse_block_entries.clear();
block_infos.clear();
block_references.clear();
code.set_offset(prelude_info.end_of_prelude);
}
void AddressSpace::DumpDisassembly() const {
for (u32* ptr = mem.ptr(); ptr < code.xptr<u32*>(); ptr++) {
std::printf("%s", Common::DisassembleAArch64(*ptr, mcl::bit_cast<u64>(ptr)).c_str());
}
}
size_t AddressSpace::GetRemainingSize() {
return code_cache_size - static_cast<size_t>(code.offset());
}
EmittedBlockInfo AddressSpace::Emit(IR::Block block) {
if (GetRemainingSize() < 1024 * 1024) {
ClearCache();
}
UnprotectCodeMemory();
EmittedBlockInfo block_info = EmitArm64(code, std::move(block), GetEmitConfig(), fastmem_manager);
ASSERT(block_entries.insert({block.Location(), block_info.entry_point}).second);
ASSERT(reverse_block_entries.insert({block_info.entry_point, block.Location()}).second);
ASSERT(block_infos.insert({block_info.entry_point, block_info}).second);
Link(block_info);
RelinkForDescriptor(block.Location(), block_info.entry_point);
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
ProtectCodeMemory();
RegisterNewBasicBlock(block, block_info);
return block_info;
}
void AddressSpace::Link(EmittedBlockInfo& block_info) {
using namespace oaknut;
using namespace oaknut::util;
for (auto [ptr_offset, target] : block_info.relocations) {
CodeGenerator c{mem.ptr(), mem.ptr()};
c.set_xptr(reinterpret_cast<u32*>(block_info.entry_point + ptr_offset));
switch (target) {
case LinkTarget::ReturnToDispatcher:
c.B(prelude_info.return_to_dispatcher);
break;
case LinkTarget::ReturnFromRunCode:
c.B(prelude_info.return_from_run_code);
break;
case LinkTarget::ReadMemory8:
c.BL(prelude_info.read_memory_8);
break;
case LinkTarget::ReadMemory16:
c.BL(prelude_info.read_memory_16);
break;
case LinkTarget::ReadMemory32:
c.BL(prelude_info.read_memory_32);
break;
case LinkTarget::ReadMemory64:
c.BL(prelude_info.read_memory_64);
break;
case LinkTarget::ReadMemory128:
c.BL(prelude_info.read_memory_128);
break;
case LinkTarget::WrappedReadMemory8:
c.BL(prelude_info.wrapped_read_memory_8);
break;
case LinkTarget::WrappedReadMemory16:
c.BL(prelude_info.wrapped_read_memory_16);
break;
case LinkTarget::WrappedReadMemory32:
c.BL(prelude_info.wrapped_read_memory_32);
break;
case LinkTarget::WrappedReadMemory64:
c.BL(prelude_info.wrapped_read_memory_64);
break;
case LinkTarget::WrappedReadMemory128:
c.BL(prelude_info.wrapped_read_memory_128);
break;
case LinkTarget::ExclusiveReadMemory8:
c.BL(prelude_info.exclusive_read_memory_8);
break;
case LinkTarget::ExclusiveReadMemory16:
c.BL(prelude_info.exclusive_read_memory_16);
break;
case LinkTarget::ExclusiveReadMemory32:
c.BL(prelude_info.exclusive_read_memory_32);
break;
case LinkTarget::ExclusiveReadMemory64:
c.BL(prelude_info.exclusive_read_memory_64);
break;
case LinkTarget::ExclusiveReadMemory128:
c.BL(prelude_info.exclusive_read_memory_128);
break;
case LinkTarget::WriteMemory8:
c.BL(prelude_info.write_memory_8);
break;
case LinkTarget::WriteMemory16:
c.BL(prelude_info.write_memory_16);
break;
case LinkTarget::WriteMemory32:
c.BL(prelude_info.write_memory_32);
break;
case LinkTarget::WriteMemory64:
c.BL(prelude_info.write_memory_64);
break;
case LinkTarget::WriteMemory128:
c.BL(prelude_info.write_memory_128);
break;
case LinkTarget::WrappedWriteMemory8:
c.BL(prelude_info.wrapped_write_memory_8);
break;
case LinkTarget::WrappedWriteMemory16:
c.BL(prelude_info.wrapped_write_memory_16);
break;
case LinkTarget::WrappedWriteMemory32:
c.BL(prelude_info.wrapped_write_memory_32);
break;
case LinkTarget::WrappedWriteMemory64:
c.BL(prelude_info.wrapped_write_memory_64);
break;
case LinkTarget::WrappedWriteMemory128:
c.BL(prelude_info.wrapped_write_memory_128);
break;
case LinkTarget::ExclusiveWriteMemory8:
c.BL(prelude_info.exclusive_write_memory_8);
break;
case LinkTarget::ExclusiveWriteMemory16:
c.BL(prelude_info.exclusive_write_memory_16);
break;
case LinkTarget::ExclusiveWriteMemory32:
c.BL(prelude_info.exclusive_write_memory_32);
break;
case LinkTarget::ExclusiveWriteMemory64:
c.BL(prelude_info.exclusive_write_memory_64);
break;
case LinkTarget::ExclusiveWriteMemory128:
c.BL(prelude_info.exclusive_write_memory_128);
break;
case LinkTarget::CallSVC:
c.BL(prelude_info.call_svc);
break;
case LinkTarget::ExceptionRaised:
c.BL(prelude_info.exception_raised);
break;
case LinkTarget::InstructionSynchronizationBarrierRaised:
c.BL(prelude_info.isb_raised);
break;
case LinkTarget::InstructionCacheOperationRaised:
c.BL(prelude_info.ic_raised);
break;
case LinkTarget::DataCacheOperationRaised:
c.BL(prelude_info.dc_raised);
break;
case LinkTarget::GetCNTPCT:
c.BL(prelude_info.get_cntpct);
break;
case LinkTarget::AddTicks:
c.BL(prelude_info.add_ticks);
break;
case LinkTarget::GetTicksRemaining:
c.BL(prelude_info.get_ticks_remaining);
break;
default:
ASSERT_FALSE("Invalid relocation target");
}
}
for (auto [target_descriptor, list] : block_info.block_relocations) {
block_references[target_descriptor].insert(block_info.entry_point);
LinkBlockLinks(block_info.entry_point, Get(target_descriptor), list);
}
}
void AddressSpace::LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector<BlockRelocation>& block_relocations_list) {
using namespace oaknut;
using namespace oaknut::util;
for (auto [ptr_offset, type] : block_relocations_list) {
CodeGenerator c{mem.ptr(), mem.ptr()};
c.set_xptr(reinterpret_cast<u32*>(entry_point + ptr_offset));
switch (type) {
case BlockRelocationType::Branch:
if (target_ptr) {
c.B((void*)target_ptr);
} else {
c.NOP();
}
break;
case BlockRelocationType::MoveToScratch1:
if (target_ptr) {
c.ADRL(Xscratch1, (void*)target_ptr);
} else {
c.ADRL(Xscratch1, prelude_info.return_to_dispatcher);
}
break;
default:
ASSERT_FALSE("Invalid BlockRelocationType");
}
}
}
void AddressSpace::RelinkForDescriptor(IR::LocationDescriptor target_descriptor, CodePtr target_ptr) {
for (auto code_ptr : block_references[target_descriptor]) {
if (auto block_iter = block_infos.find(code_ptr); block_iter != block_infos.end()) {
const EmittedBlockInfo& block_info = block_iter->second;
if (auto relocation_iter = block_info.block_relocations.find(target_descriptor); relocation_iter != block_info.block_relocations.end()) {
LinkBlockLinks(block_info.entry_point, target_ptr, relocation_iter->second);
}
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
}
}
}
FakeCall AddressSpace::FastmemCallback(u64 host_pc) {
{
const auto host_ptr = mcl::bit_cast<CodePtr>(host_pc);
const auto entry_point = ReverseGetEntryPoint(host_ptr);
if (!entry_point) {
goto fail;
}
const auto block_info = block_infos.find(entry_point);
if (block_info == block_infos.end()) {
goto fail;
}
const auto patch_entry = block_info->second.fastmem_patch_info.find(host_ptr - entry_point);
if (patch_entry == block_info->second.fastmem_patch_info.end()) {
goto fail;
}
const auto fc = patch_entry->second.fc;
if (patch_entry->second.recompile) {
const auto marker = patch_entry->second.marker;
fastmem_manager.MarkDoNotFastmem(marker);
InvalidateBasicBlocks({std::get<0>(marker)});
}
return fc;
}
fail:
fmt::print("dynarmic: Segfault happened within JITted code at host_pc = {:016x}\n", host_pc);
fmt::print("Segfault wasn't at a fastmem patch location!\n");
ASSERT_FALSE("segfault");
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,140 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <map>
#include <optional>
#include "dynarmic/common/common_types.h"
#include <oaknut/code_block.hpp>
#include <oaknut/oaknut.hpp>
#include <ankerl/unordered_dense.h>
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/fastmem.h"
#include "dynarmic/interface/halt_reason.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/location_descriptor.h"
namespace Dynarmic::Backend::Arm64 {
class AddressSpace {
public:
explicit AddressSpace(size_t code_cache_size);
virtual ~AddressSpace();
virtual IR::Block GenerateIR(IR::LocationDescriptor) const = 0;
CodePtr Get(IR::LocationDescriptor descriptor);
// Returns "most likely" LocationDescriptor assocated with the emitted code at that location
std::optional<IR::LocationDescriptor> ReverseGetLocation(CodePtr host_pc);
// Returns "most likely" entry_point associated with the emitted code at that location
CodePtr ReverseGetEntryPoint(CodePtr host_pc);
CodePtr GetOrEmit(IR::LocationDescriptor descriptor);
void InvalidateBasicBlocks(const ankerl::unordered_dense::set<IR::LocationDescriptor>& descriptors);
void ClearCache();
void DumpDisassembly() const;
protected:
virtual EmitConfig GetEmitConfig() = 0;
virtual void RegisterNewBasicBlock(const IR::Block& block, const EmittedBlockInfo& block_info) = 0;
void ProtectCodeMemory() {
#if defined(DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT) || defined(__APPLE__) || defined(__OpenBSD__)
mem.protect();
#endif
}
void UnprotectCodeMemory() {
#if defined(DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT) || defined(__APPLE__) || defined(__OpenBSD__)
mem.unprotect();
#endif
}
size_t GetRemainingSize();
EmittedBlockInfo Emit(IR::Block ir_block);
void Link(EmittedBlockInfo& block);
void LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector<BlockRelocation>& block_relocations_list);
void RelinkForDescriptor(IR::LocationDescriptor target_descriptor, CodePtr target_ptr);
FakeCall FastmemCallback(u64 host_pc);
const size_t code_cache_size;
oaknut::CodeBlock mem;
oaknut::CodeGenerator code;
// A IR::LocationDescriptor will have one current CodePtr.
// However, there can be multiple other CodePtrs which are older, previously invalidated blocks.
std::map<CodePtr, IR::LocationDescriptor> reverse_block_entries;
ankerl::unordered_dense::map<IR::LocationDescriptor, CodePtr> block_entries;
ankerl::unordered_dense::map<CodePtr, EmittedBlockInfo> block_infos;
ankerl::unordered_dense::map<IR::LocationDescriptor, ankerl::unordered_dense::set<CodePtr>> block_references;
ExceptionHandler exception_handler;
FastmemManager fastmem_manager;
struct PreludeInfo {
std::ptrdiff_t end_of_prelude;
using RunCodeFuncType = HaltReason (*)(CodePtr entry_point, void* jit_state, volatile u32* halt_reason);
RunCodeFuncType run_code;
RunCodeFuncType step_code;
void* return_to_dispatcher;
void* return_from_run_code;
void* read_memory_8;
void* read_memory_16;
void* read_memory_32;
void* read_memory_64;
void* read_memory_128;
void* wrapped_read_memory_8;
void* wrapped_read_memory_16;
void* wrapped_read_memory_32;
void* wrapped_read_memory_64;
void* wrapped_read_memory_128;
void* exclusive_read_memory_8;
void* exclusive_read_memory_16;
void* exclusive_read_memory_32;
void* exclusive_read_memory_64;
void* exclusive_read_memory_128;
void* write_memory_8;
void* write_memory_16;
void* write_memory_32;
void* write_memory_64;
void* write_memory_128;
void* wrapped_write_memory_8;
void* wrapped_write_memory_16;
void* wrapped_write_memory_32;
void* wrapped_write_memory_64;
void* wrapped_write_memory_128;
void* exclusive_write_memory_8;
void* exclusive_write_memory_16;
void* exclusive_write_memory_32;
void* exclusive_write_memory_64;
void* exclusive_write_memory_128;
void* call_svc;
void* exception_raised;
void* dc_raised;
void* ic_raised;
void* isb_raised;
void* get_cntpct;
void* add_ticks;
void* get_ticks_remaining;
} prelude_info;
};
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,60 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <mcl/bit_cast.hpp>
#include "dynarmic/common/common_types.h"
#include <mcl/type_traits/function_info.hpp>
namespace Dynarmic::Backend::Arm64 {
struct DevirtualizedCall {
u64 fn_ptr;
u64 this_ptr;
};
// https://rants.vastheman.com/2021/09/21/msvc/
template<auto mfp>
DevirtualizedCall DevirtualizeWindows(mcl::class_type<decltype(mfp)>* this_) {
static_assert(sizeof(mfp) == 8);
return DevirtualizedCall{mcl::bit_cast<u64>(mfp), reinterpret_cast<u64>(this_)};
}
// https://github.com/ARM-software/abi-aa/blob/main/cppabi64/cppabi64.rst#representation-of-pointer-to-member-function
template<auto mfp>
DevirtualizedCall DevirtualizeDefault(mcl::class_type<decltype(mfp)>* this_) {
struct MemberFunctionPointer {
// Address of non-virtual function or index into vtable.
u64 ptr;
// LSB is discriminator for if function is virtual. Other bits are this adjustment.
u64 adj;
} mfp_struct = mcl::bit_cast<MemberFunctionPointer>(mfp);
static_assert(sizeof(MemberFunctionPointer) == 16);
static_assert(sizeof(MemberFunctionPointer) == sizeof(mfp));
u64 fn_ptr = mfp_struct.ptr;
u64 this_ptr = mcl::bit_cast<u64>(this_) + (mfp_struct.adj >> 1);
if (mfp_struct.adj & 1) {
u64 vtable = mcl::bit_cast_pointee<u64>(this_ptr);
fn_ptr = mcl::bit_cast_pointee<u64>(vtable + fn_ptr);
}
return DevirtualizedCall{fn_ptr, this_ptr};
}
template<auto mfp>
DevirtualizedCall Devirtualize(mcl::class_type<decltype(mfp)>* this_) {
#if defined(_WIN32) && defined(_MSC_VER)
return DevirtualizeWindows<mfp>(this_);
#else
return DevirtualizeDefault<mfp>(this_);
#endif
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,290 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/arm64/emit_arm64.h"
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/fpsr_manager.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/backend/arm64/verbose_debugging_output.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
template<>
void EmitIR<IR::Opcode::Void>(oaknut::CodeGenerator&, EmitContext&, IR::Inst*) {}
template<>
void EmitIR<IR::Opcode::Identity>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.DefineAsExisting(inst, args[0]);
}
template<>
void EmitIR<IR::Opcode::Breakpoint>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
code.BRK(0);
}
template<>
void EmitIR<IR::Opcode::CallHostFunction>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall(args[1], args[2], args[3]);
code.MOV(Xscratch0, args[0].GetImmediateU64());
code.BLR(Xscratch0);
}
template<>
void EmitIR<IR::Opcode::PushRSB>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
if (!ctx.conf.HasOptimization(OptimizationFlag::ReturnStackBuffer)) {
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[0].IsImmediate());
const IR::LocationDescriptor target{args[0].GetImmediateU64()};
code.LDR(Wscratch2, SP, offsetof(StackLayout, rsb_ptr));
code.ADD(Wscratch2, Wscratch2, sizeof(RSBEntry));
code.AND(Wscratch2, Wscratch2, RSBIndexMask);
code.STR(Wscratch2, SP, offsetof(StackLayout, rsb_ptr));
code.ADD(Xscratch2, SP, Xscratch2);
code.MOV(Xscratch0, target.Value());
EmitBlockLinkRelocation(code, ctx, target, BlockRelocationType::MoveToScratch1);
code.STP(Xscratch0, Xscratch1, Xscratch2, offsetof(StackLayout, rsb));
}
template<>
void EmitIR<IR::Opcode::GetCarryFromOp>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
[[maybe_unused]] auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(ctx.reg_alloc.WasValueDefined(inst));
}
template<>
void EmitIR<IR::Opcode::GetOverflowFromOp>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
[[maybe_unused]] auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(ctx.reg_alloc.WasValueDefined(inst));
}
template<>
void EmitIR<IR::Opcode::GetGEFromOp>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
[[maybe_unused]] auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(ctx.reg_alloc.WasValueDefined(inst));
}
template<>
void EmitIR<IR::Opcode::GetNZCVFromOp>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (ctx.reg_alloc.WasValueDefined(inst)) {
return;
}
switch (args[0].GetType()) {
case IR::Type::U32: {
auto Wvalue = ctx.reg_alloc.ReadW(args[0]);
auto flags = ctx.reg_alloc.WriteFlags(inst);
RegAlloc::Realize(Wvalue, flags);
code.TST(*Wvalue, Wvalue);
break;
}
case IR::Type::U64: {
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
auto flags = ctx.reg_alloc.WriteFlags(inst);
RegAlloc::Realize(Xvalue, flags);
code.TST(*Xvalue, Xvalue);
break;
}
default:
ASSERT_FALSE("Invalid type for GetNZCVFromOp");
break;
}
}
template<>
void EmitIR<IR::Opcode::GetNZFromOp>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (ctx.reg_alloc.WasValueDefined(inst)) {
return;
}
switch (args[0].GetType()) {
case IR::Type::U32: {
auto Wvalue = ctx.reg_alloc.ReadW(args[0]);
auto flags = ctx.reg_alloc.WriteFlags(inst);
RegAlloc::Realize(Wvalue, flags);
code.TST(*Wvalue, *Wvalue);
break;
}
case IR::Type::U64: {
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
auto flags = ctx.reg_alloc.WriteFlags(inst);
RegAlloc::Realize(Xvalue, flags);
code.TST(*Xvalue, *Xvalue);
break;
}
default:
ASSERT_FALSE("Invalid type for GetNZFromOp");
break;
}
}
template<>
void EmitIR<IR::Opcode::GetUpperFromOp>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
[[maybe_unused]] auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(ctx.reg_alloc.WasValueDefined(inst));
}
template<>
void EmitIR<IR::Opcode::GetLowerFromOp>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
[[maybe_unused]] auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(ctx.reg_alloc.WasValueDefined(inst));
}
template<>
void EmitIR<IR::Opcode::GetCFlagFromNZCV>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wc = ctx.reg_alloc.WriteW(inst);
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wc, Wnzcv);
code.AND(Wc, Wnzcv, 1 << 29);
}
template<>
void EmitIR<IR::Opcode::NZCVFromPackedFlags>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.DefineAsExisting(inst, args[0]);
}
static void EmitAddCycles(oaknut::CodeGenerator& code, EmitContext& ctx, size_t cycles_to_add) {
if (!ctx.conf.enable_cycle_counting) {
return;
}
if (cycles_to_add == 0) {
return;
}
if (oaknut::AddSubImm::is_valid(cycles_to_add)) {
code.SUB(Xticks, Xticks, cycles_to_add);
} else {
code.MOV(Xscratch1, cycles_to_add);
code.SUB(Xticks, Xticks, Xscratch1);
}
}
EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const EmitConfig& conf, FastmemManager& fastmem_manager) {
if (conf.very_verbose_debugging_output) {
std::puts(IR::DumpBlock(block).c_str());
}
EmittedBlockInfo ebi;
FpsrManager fpsr_manager{code, conf.state_fpsr_offset};
RegAlloc reg_alloc{code, fpsr_manager, GPR_ORDER, FPR_ORDER};
EmitContext ctx{block, reg_alloc, conf, ebi, fpsr_manager, fastmem_manager, {}};
ebi.entry_point = code.xptr<CodePtr>();
if (ctx.block.GetCondition() == IR::Cond::AL) {
ASSERT(!ctx.block.HasConditionFailedLocation());
} else {
ASSERT(ctx.block.HasConditionFailedLocation());
oaknut::Label pass;
pass = conf.emit_cond(code, ctx, ctx.block.GetCondition());
EmitAddCycles(code, ctx, ctx.block.ConditionFailedCycleCount());
conf.emit_condition_failed_terminal(code, ctx);
code.l(pass);
}
for (auto iter = block.begin(); iter != block.end(); ++iter) {
IR::Inst* inst = &*iter;
switch (inst->GetOpcode()) {
#define OPCODE(name, type, ...) \
case IR::Opcode::name: \
EmitIR<IR::Opcode::name>(code, ctx, inst); \
break;
#define A32OPC(name, type, ...) \
case IR::Opcode::A32##name: \
EmitIR<IR::Opcode::A32##name>(code, ctx, inst); \
break;
#define A64OPC(name, type, ...) \
case IR::Opcode::A64##name: \
EmitIR<IR::Opcode::A64##name>(code, ctx, inst); \
break;
#include "dynarmic/ir/opcodes.inc"
#undef OPCODE
#undef A32OPC
#undef A64OPC
default:
ASSERT_FALSE("Invalid opcode: {}", inst->GetOpcode());
break;
}
reg_alloc.UpdateAllUses();
reg_alloc.AssertAllUnlocked();
if (conf.very_verbose_debugging_output) {
EmitVerboseDebuggingOutput(code, ctx);
}
}
fpsr_manager.Spill();
reg_alloc.AssertNoMoreUses();
EmitAddCycles(code, ctx, block.CycleCount());
conf.emit_terminal(code, ctx);
code.BRK(0);
for (const auto& deferred_emit : ctx.deferred_emits) {
deferred_emit();
}
code.BRK(0);
ebi.size = code.xptr<CodePtr>() - ebi.entry_point;
return ebi;
}
void EmitRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, LinkTarget link_target) {
ctx.ebi.relocations.emplace_back(Relocation{code.xptr<CodePtr>() - ctx.ebi.entry_point, link_target});
code.NOP();
}
void EmitBlockLinkRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, const IR::LocationDescriptor& descriptor, BlockRelocationType type) {
ctx.ebi.block_relocations[descriptor].emplace_back(BlockRelocation{code.xptr<CodePtr>() - ctx.ebi.entry_point, type});
switch (type) {
case BlockRelocationType::Branch:
code.NOP();
break;
case BlockRelocationType::MoveToScratch1:
code.BRK(0);
code.NOP();
break;
default:
UNREACHABLE();
}
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,184 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <array>
#include <cstddef>
#include <memory>
#include <vector>
#include "dynarmic/common/common_types.h"
#include <ankerl/unordered_dense.h>
#include "dynarmic/backend/arm64/fastmem.h"
#include "dynarmic/interface/A32/coprocessor.h"
#include "dynarmic/interface/optimization_flags.h"
#include "dynarmic/ir/location_descriptor.h"
namespace oaknut {
struct CodeGenerator;
struct Label;
} // namespace oaknut
namespace Dynarmic::FP {
class FPCR;
} // namespace Dynarmic::FP
namespace Dynarmic::IR {
class Block;
class Inst;
enum class Cond;
enum class Opcode;
} // namespace Dynarmic::IR
namespace Dynarmic::Backend::Arm64 {
struct EmitContext;
using CodePtr = std::byte*;
enum class LinkTarget {
ReturnToDispatcher,
ReturnFromRunCode,
ReadMemory8,
ReadMemory16,
ReadMemory32,
ReadMemory64,
ReadMemory128,
WrappedReadMemory8,
WrappedReadMemory16,
WrappedReadMemory32,
WrappedReadMemory64,
WrappedReadMemory128,
ExclusiveReadMemory8,
ExclusiveReadMemory16,
ExclusiveReadMemory32,
ExclusiveReadMemory64,
ExclusiveReadMemory128,
WriteMemory8,
WriteMemory16,
WriteMemory32,
WriteMemory64,
WriteMemory128,
WrappedWriteMemory8,
WrappedWriteMemory16,
WrappedWriteMemory32,
WrappedWriteMemory64,
WrappedWriteMemory128,
ExclusiveWriteMemory8,
ExclusiveWriteMemory16,
ExclusiveWriteMemory32,
ExclusiveWriteMemory64,
ExclusiveWriteMemory128,
CallSVC,
ExceptionRaised,
InstructionSynchronizationBarrierRaised,
InstructionCacheOperationRaised,
DataCacheOperationRaised,
GetCNTPCT,
AddTicks,
GetTicksRemaining,
};
struct Relocation {
std::ptrdiff_t code_offset;
LinkTarget target;
};
enum class BlockRelocationType {
Branch,
MoveToScratch1,
};
struct BlockRelocation {
std::ptrdiff_t code_offset;
BlockRelocationType type;
};
struct EmittedBlockInfo {
CodePtr entry_point;
size_t size;
std::vector<Relocation> relocations;
ankerl::unordered_dense::map<IR::LocationDescriptor, std::vector<BlockRelocation>> block_relocations;
ankerl::unordered_dense::map<std::ptrdiff_t, FastmemPatchInfo> fastmem_patch_info;
};
struct EmitConfig {
OptimizationFlag optimizations;
bool HasOptimization(OptimizationFlag f) const { return (f & optimizations) != no_optimizations; }
bool hook_isb;
// System registers
u64 cntfreq_el0;
u32 ctr_el0;
u32 dczid_el0;
const u64* tpidrro_el0;
u64* tpidr_el0;
// Memory
bool check_halt_on_memory_access;
// Page table
u64 page_table_pointer;
size_t page_table_address_space_bits;
int page_table_pointer_mask_bits;
bool silently_mirror_page_table;
bool absolute_offset_page_table;
u8 detect_misaligned_access_via_page_table;
bool only_detect_misalignment_via_page_table_on_page_boundary;
// Fastmem
std::optional<u64> fastmem_pointer;
bool recompile_on_fastmem_failure;
size_t fastmem_address_space_bits;
bool silently_mirror_fastmem;
// Timing
bool wall_clock_cntpct;
bool enable_cycle_counting;
// Endianness
bool always_little_endian;
// Frontend specific callbacks
FP::FPCR (*descriptor_to_fpcr)(const IR::LocationDescriptor& descriptor);
oaknut::Label (*emit_cond)(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond);
void (*emit_condition_failed_terminal)(oaknut::CodeGenerator& code, EmitContext& ctx);
void (*emit_terminal)(oaknut::CodeGenerator& code, EmitContext& ctx);
void (*emit_check_memory_abort)(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, oaknut::Label& end);
// State offsets
size_t state_nzcv_offset;
size_t state_fpsr_offset;
size_t state_exclusive_state_offset;
// A32 specific
std::array<std::shared_ptr<A32::Coprocessor>, 16> coprocessors{};
// Debugging
bool very_verbose_debugging_output;
};
EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const EmitConfig& emit_conf, FastmemManager& fastmem_manager);
template<IR::Opcode op>
void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
void EmitRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, LinkTarget link_target);
void EmitBlockLinkRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, const IR::LocationDescriptor& descriptor, BlockRelocationType type);
oaknut::Label EmitA32Cond(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond);
oaknut::Label EmitA64Cond(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond);
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx);
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx);
void EmitA32ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx);
void EmitA64ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx);
void EmitA32CheckMemoryAbort(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, oaknut::Label& end);
void EmitA64CheckMemoryAbort(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, oaknut::Label& end);
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,707 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <mcl/bit/bit_field.hpp>
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/a32_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/fpsr_manager.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/frontend/A32/a32_types.h"
#include "dynarmic/interface/halt_reason.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
oaknut::Label EmitA32Cond(oaknut::CodeGenerator& code, EmitContext&, IR::Cond cond) {
oaknut::Label pass;
// TODO: Flags in host flags
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
code.MSR(oaknut::SystemReg::NZCV, Xscratch0);
code.B(static_cast<oaknut::Cond>(cond), pass);
return pass;
}
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step);
void EmitA32Terminal(oaknut::CodeGenerator&, EmitContext&, IR::Term::Interpret, IR::LocationDescriptor, bool) {
ASSERT_FALSE("Interpret should never be emitted.");
}
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) {
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
}
static void EmitSetUpperLocationDescriptor(oaknut::CodeGenerator& code, EmitContext& ctx, IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) {
auto get_upper = [](const IR::LocationDescriptor& desc) -> u32 {
return static_cast<u32>(A32::LocationDescriptor{desc}.SetSingleStepping(false).UniqueHash() >> 32);
};
const u32 old_upper = get_upper(old_location);
const u32 new_upper = [&] {
const u32 mask = ~u32(ctx.conf.always_little_endian ? 0x2 : 0);
return get_upper(new_location) & mask;
}();
if (old_upper != new_upper) {
code.MOV(Wscratch0, new_upper);
code.STR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
}
}
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
EmitSetUpperLocationDescriptor(code, ctx, terminal.next, initial_location);
oaknut::Label fail;
if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
if (ctx.conf.enable_cycle_counting) {
code.CMP(Xticks, 0);
code.B(LE, fail);
EmitBlockLinkRelocation(code, ctx, terminal.next, BlockRelocationType::Branch);
} else {
code.LDAR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, fail);
EmitBlockLinkRelocation(code, ctx, terminal.next, BlockRelocationType::Branch);
}
}
code.l(fail);
code.MOV(Wscratch0, A32::LocationDescriptor{terminal.next}.PC());
code.STR(Wscratch0, Xstate, offsetof(A32JitState, regs) + sizeof(u32) * 15);
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
}
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
EmitSetUpperLocationDescriptor(code, ctx, terminal.next, initial_location);
if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
EmitBlockLinkRelocation(code, ctx, terminal.next, BlockRelocationType::Branch);
}
code.MOV(Wscratch0, A32::LocationDescriptor{terminal.next}.PC());
code.STR(Wscratch0, Xstate, offsetof(A32JitState, regs) + sizeof(u32) * 15);
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
}
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::PopRSBHint, IR::LocationDescriptor, bool is_single_step) {
if (ctx.conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) && !is_single_step) {
oaknut::Label fail;
code.LDR(Wscratch2, SP, offsetof(StackLayout, rsb_ptr));
code.AND(Wscratch2, Wscratch2, RSBIndexMask);
code.ADD(X2, SP, Xscratch2);
code.SUB(Wscratch2, Wscratch2, sizeof(RSBEntry));
code.STR(Wscratch2, SP, offsetof(StackLayout, rsb_ptr));
code.LDP(Xscratch0, Xscratch1, X2, offsetof(StackLayout, rsb));
static_assert(offsetof(A32JitState, regs) + 16 * sizeof(u32) == offsetof(A32JitState, upper_location_descriptor));
code.LDUR(X0, Xstate, offsetof(A32JitState, regs) + 15 * sizeof(u32));
code.CMP(X0, Xscratch0);
code.B(NE, fail);
code.BR(Xscratch1);
code.l(fail);
}
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
}
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::FastDispatchHint, IR::LocationDescriptor, bool) {
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
// TODO: Implement FastDispatchHint optimization
}
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
oaknut::Label pass = EmitA32Cond(code, ctx, terminal.if_);
EmitA32Terminal(code, ctx, terminal.else_, initial_location, is_single_step);
code.l(pass);
EmitA32Terminal(code, ctx, terminal.then_, initial_location, is_single_step);
}
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
oaknut::Label fail;
code.LDRB(Wscratch0, SP, offsetof(StackLayout, check_bit));
code.CBZ(Wscratch0, fail);
EmitA32Terminal(code, ctx, terminal.then_, initial_location, is_single_step);
code.l(fail);
EmitA32Terminal(code, ctx, terminal.else_, initial_location, is_single_step);
}
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
oaknut::Label fail;
code.LDAR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, fail);
EmitA32Terminal(code, ctx, terminal.else_, initial_location, is_single_step);
code.l(fail);
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
}
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
boost::apply_visitor([&](const auto& t) { EmitA32Terminal(code, ctx, t, initial_location, is_single_step); }, terminal);
}
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx) {
const A32::LocationDescriptor location{ctx.block.Location()};
EmitA32Terminal(code, ctx, ctx.block.GetTerminal(), location.SetSingleStepping(false), location.SingleStepping());
}
void EmitA32ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx) {
const A32::LocationDescriptor location{ctx.block.Location()};
EmitA32Terminal(code, ctx, IR::Term::LinkBlock{ctx.block.ConditionFailedLocation()}, location.SetSingleStepping(false), location.SingleStepping());
}
void EmitA32CheckMemoryAbort(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, oaknut::Label& end) {
if (!ctx.conf.check_halt_on_memory_access) {
return;
}
const A32::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}};
code.LDAR(Xscratch0, Xhalt);
code.TST(Xscratch0, static_cast<u32>(HaltReason::MemoryAbort));
code.B(EQ, end);
EmitSetUpperLocationDescriptor(code, ctx, current_location, ctx.block.Location());
code.MOV(Wscratch0, current_location.PC());
code.STR(Wscratch0, Xstate, offsetof(A32JitState, regs) + sizeof(u32) * 15);
EmitRelocation(code, ctx, LinkTarget::ReturnFromRunCode);
}
template<>
void EmitIR<IR::Opcode::A32SetCheckBit>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
if (args[0].GetImmediateU1()) {
code.MOV(Wscratch0, 1);
code.STRB(Wscratch0, SP, offsetof(StackLayout, check_bit));
} else {
code.STRB(WZR, SP, offsetof(StackLayout, check_bit));
}
} else {
auto Wbit = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wbit);
code.STRB(Wbit, SP, offsetof(StackLayout, check_bit));
}
}
template<>
void EmitIR<IR::Opcode::A32GetRegister>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
auto Wresult = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wresult);
// TODO: Detect if Gpr vs Fpr is more appropriate
code.LDR(Wresult, Xstate, offsetof(A32JitState, regs) + sizeof(u32) * static_cast<size_t>(reg));
}
template<>
void EmitIR<IR::Opcode::A32GetExtendedRegister32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
ASSERT(A32::IsSingleExtReg(reg));
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::S0);
auto Sresult = ctx.reg_alloc.WriteS(inst);
RegAlloc::Realize(Sresult);
// TODO: Detect if Gpr vs Fpr is more appropriate
code.LDR(Sresult, Xstate, offsetof(A32JitState, ext_regs) + sizeof(u32) * index);
}
template<>
void EmitIR<IR::Opcode::A32GetVector>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
ASSERT(A32::IsDoubleExtReg(reg) || A32::IsQuadExtReg(reg));
if (A32::IsDoubleExtReg(reg)) {
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::D0);
auto Dresult = ctx.reg_alloc.WriteD(inst);
RegAlloc::Realize(Dresult);
code.LDR(Dresult, Xstate, offsetof(A32JitState, ext_regs) + sizeof(u64) * index);
} else {
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::Q0);
auto Qresult = ctx.reg_alloc.WriteQ(inst);
RegAlloc::Realize(Qresult);
code.LDR(Qresult, Xstate, offsetof(A32JitState, ext_regs) + 2 * sizeof(u64) * index);
}
}
template<>
void EmitIR<IR::Opcode::A32GetExtendedRegister64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
ASSERT(A32::IsDoubleExtReg(reg));
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::D0);
auto Dresult = ctx.reg_alloc.WriteD(inst);
RegAlloc::Realize(Dresult);
// TODO: Detect if Gpr vs Fpr is more appropriate
code.LDR(Dresult, Xstate, offsetof(A32JitState, ext_regs) + 2 * sizeof(u32) * index);
}
template<>
void EmitIR<IR::Opcode::A32SetRegister>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wvalue = ctx.reg_alloc.ReadW(args[1]);
RegAlloc::Realize(Wvalue);
// TODO: Detect if Gpr vs Fpr is more appropriate
code.STR(Wvalue, Xstate, offsetof(A32JitState, regs) + sizeof(u32) * static_cast<size_t>(reg));
}
template<>
void EmitIR<IR::Opcode::A32SetExtendedRegister32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
ASSERT(A32::IsSingleExtReg(reg));
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::S0);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Svalue = ctx.reg_alloc.ReadS(args[1]);
RegAlloc::Realize(Svalue);
// TODO: Detect if Gpr vs Fpr is more appropriate
code.STR(Svalue, Xstate, offsetof(A32JitState, ext_regs) + sizeof(u32) * index);
}
template<>
void EmitIR<IR::Opcode::A32SetExtendedRegister64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
ASSERT(A32::IsDoubleExtReg(reg));
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::D0);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Dvalue = ctx.reg_alloc.ReadD(args[1]);
RegAlloc::Realize(Dvalue);
// TODO: Detect if Gpr vs Fpr is more appropriate
code.STR(Dvalue, Xstate, offsetof(A32JitState, ext_regs) + 2 * sizeof(u32) * index);
}
template<>
void EmitIR<IR::Opcode::A32SetVector>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
ASSERT(A32::IsDoubleExtReg(reg) || A32::IsQuadExtReg(reg));
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (A32::IsDoubleExtReg(reg)) {
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::D0);
auto Dvalue = ctx.reg_alloc.ReadD(args[1]);
RegAlloc::Realize(Dvalue);
code.STR(Dvalue, Xstate, offsetof(A32JitState, ext_regs) + sizeof(u64) * index);
} else {
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::Q0);
auto Qvalue = ctx.reg_alloc.ReadQ(args[1]);
RegAlloc::Realize(Qvalue);
code.STR(Qvalue, Xstate, offsetof(A32JitState, ext_regs) + 2 * sizeof(u64) * index);
}
}
template<>
void EmitIR<IR::Opcode::A32GetCpsr>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto Wcpsr = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wcpsr);
static_assert(offsetof(A32JitState, cpsr_nzcv) + sizeof(u32) == offsetof(A32JitState, cpsr_q));
code.LDP(Wscratch0, Wscratch1, Xstate, offsetof(A32JitState, cpsr_nzcv));
code.LDR(Wcpsr, Xstate, offsetof(A32JitState, cpsr_jaifm));
code.ORR(Wcpsr, Wcpsr, Wscratch0);
code.ORR(Wcpsr, Wcpsr, Wscratch1);
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_ge));
code.AND(Wscratch0, Wscratch0, 0x80808080);
code.MOV(Wscratch1, 0x00204081);
code.MUL(Wscratch0, Wscratch0, Wscratch1);
code.AND(Wscratch0, Wscratch0, 0xf0000000);
code.ORR(Wcpsr, Wcpsr, Wscratch0, LSR, 12);
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
code.AND(Wscratch0, Wscratch0, 0b11);
// 9 8 7 6 5
// E T
code.ORR(Wscratch0, Wscratch0, Wscratch0, LSL, 3);
code.AND(Wscratch0, Wscratch0, 0x11111111);
code.ORR(Wcpsr, Wcpsr, Wscratch0, LSL, 5);
}
template<>
void EmitIR<IR::Opcode::A32SetCpsr>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wcpsr = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wcpsr);
// NZCV, Q flags
code.AND(Wscratch0, Wcpsr, 0xF0000000);
code.AND(Wscratch1, Wcpsr, 1 << 27);
static_assert(offsetof(A32JitState, cpsr_nzcv) + sizeof(u32) == offsetof(A32JitState, cpsr_q));
code.STP(Wscratch0, Wscratch1, Xstate, offsetof(A32JitState, cpsr_nzcv));
// GE flags
// this does the following:
// cpsr_ge |= mcl::bit::get_bit<19>(cpsr) ? 0xFF000000 : 0;
// cpsr_ge |= mcl::bit::get_bit<18>(cpsr) ? 0x00FF0000 : 0;
// cpsr_ge |= mcl::bit::get_bit<17>(cpsr) ? 0x0000FF00 : 0;
// cpsr_ge |= mcl::bit::get_bit<16>(cpsr) ? 0x000000FF : 0;
code.UBFX(Wscratch0, Wcpsr, 16, 4);
code.MOV(Wscratch1, 0x00204081);
code.MUL(Wscratch0, Wscratch0, Wscratch1);
code.AND(Wscratch0, Wscratch0, 0x01010101);
code.LSL(Wscratch1, Wscratch0, 8);
code.SUB(Wscratch0, Wscratch1, Wscratch0);
// Other flags
code.MOV(Wscratch1, 0x010001DF);
code.AND(Wscratch1, Wcpsr, Wscratch1);
static_assert(offsetof(A32JitState, cpsr_jaifm) + sizeof(u32) == offsetof(A32JitState, cpsr_ge));
code.STP(Wscratch1, Wscratch0, Xstate, offsetof(A32JitState, cpsr_jaifm));
// IT state
code.AND(Wscratch0, Wcpsr, 0xFC00);
code.LSR(Wscratch1, Wcpsr, 17);
code.AND(Wscratch1, Wscratch1, 0x300);
code.ORR(Wscratch0, Wscratch0, Wscratch1);
// E flag, T flag
code.LSR(Wscratch1, Wcpsr, 8);
code.AND(Wscratch1, Wscratch1, 0x2);
code.ORR(Wscratch0, Wscratch0, Wscratch1);
code.LDR(Wscratch1, Xstate, offsetof(A32JitState, upper_location_descriptor));
code.BFXIL(Wscratch0, Wcpsr, 5, 1);
code.AND(Wscratch1, Wscratch1, 0xFFFF0000);
code.ORR(Wscratch0, Wscratch0, Wscratch1);
code.STR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
}
template<>
void EmitIR<IR::Opcode::A32SetCpsrNZCV>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wnzcv);
code.STR(Wnzcv, Xstate, offsetof(A32JitState, cpsr_nzcv));
}
template<>
void EmitIR<IR::Opcode::A32SetCpsrNZCVRaw>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wnzcv);
code.STR(Wnzcv, Xstate, offsetof(A32JitState, cpsr_nzcv));
}
template<>
void EmitIR<IR::Opcode::A32SetCpsrNZCVQ>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wnzcv);
static_assert(offsetof(A32JitState, cpsr_nzcv) + sizeof(u32) == offsetof(A32JitState, cpsr_q));
code.AND(Wscratch0, Wnzcv, 0xf000'0000);
code.AND(Wscratch1, Wnzcv, 0x0800'0000);
code.STP(Wscratch0, Wscratch1, Xstate, offsetof(A32JitState, cpsr_nzcv));
}
template<>
void EmitIR<IR::Opcode::A32SetCpsrNZ>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wnz = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wnz);
// TODO: Track latent value
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
code.AND(Wscratch0, Wscratch0, 0x30000000);
code.ORR(Wscratch0, Wscratch0, Wnz);
code.STR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
}
template<>
void EmitIR<IR::Opcode::A32SetCpsrNZC>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
// TODO: Track latent value
if (args[0].IsImmediate()) {
if (args[1].IsImmediate()) {
const u32 carry = args[1].GetImmediateU1() ? 0x2000'0000 : 0;
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
code.AND(Wscratch0, Wscratch0, 0x10000000);
if (carry) {
code.ORR(Wscratch0, Wscratch0, carry);
}
code.STR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
} else {
auto Wc = ctx.reg_alloc.ReadW(args[1]);
RegAlloc::Realize(Wc);
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
code.AND(Wscratch0, Wscratch0, 0x10000000);
code.ORR(Wscratch0, Wscratch0, Wc);
code.STR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
}
} else {
if (args[1].IsImmediate()) {
const u32 carry = args[1].GetImmediateU1() ? 0x2000'0000 : 0;
auto Wnz = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wnz);
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
code.AND(Wscratch0, Wscratch0, 0x10000000);
code.ORR(Wscratch0, Wscratch0, Wnz);
if (carry) {
code.ORR(Wscratch0, Wscratch0, carry);
}
code.STR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
} else {
auto Wnz = ctx.reg_alloc.ReadW(args[0]);
auto Wc = ctx.reg_alloc.ReadW(args[1]);
RegAlloc::Realize(Wnz, Wc);
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
code.AND(Wscratch0, Wscratch0, 0x10000000);
code.ORR(Wscratch0, Wscratch0, Wnz);
code.ORR(Wscratch0, Wscratch0, Wc);
code.STR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
}
}
}
template<>
void EmitIR<IR::Opcode::A32GetCFlag>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto Wflag = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wflag);
code.LDR(Wflag, Xstate, offsetof(A32JitState, cpsr_nzcv));
code.AND(Wflag, Wflag, 1 << 29);
}
template<>
void EmitIR<IR::Opcode::A32OrQFlag>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wflag = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wflag);
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_q));
code.ORR(Wscratch0, Wscratch0, Wflag, LSL, 27);
code.STR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_q));
}
template<>
void EmitIR<IR::Opcode::A32GetGEFlags>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto Snzcv = ctx.reg_alloc.WriteS(inst);
RegAlloc::Realize(Snzcv);
code.LDR(Snzcv, Xstate, offsetof(A32JitState, cpsr_ge));
}
template<>
void EmitIR<IR::Opcode::A32SetGEFlags>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Snzcv = ctx.reg_alloc.ReadS(args[0]);
RegAlloc::Realize(Snzcv);
code.STR(Snzcv, Xstate, offsetof(A32JitState, cpsr_ge));
}
template<>
void EmitIR<IR::Opcode::A32SetGEFlagsCompressed>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wge = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wge);
code.LSR(Wscratch0, Wge, 16);
code.MOV(Wscratch1, 0x00204081);
code.MUL(Wscratch0, Wscratch0, Wscratch1);
code.AND(Wscratch0, Wscratch0, 0x01010101);
code.LSL(Wscratch1, Wscratch0, 8);
code.SUB(Wscratch0, Wscratch1, Wscratch0);
code.STR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_ge));
}
template<>
void EmitIR<IR::Opcode::A32BXWritePC>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const u32 upper_without_t = (A32::LocationDescriptor{ctx.block.EndLocation()}.SetSingleStepping(false).UniqueHash() >> 32) & 0xFFFFFFFE;
static_assert(offsetof(A32JitState, regs) + 16 * sizeof(u32) == offsetof(A32JitState, upper_location_descriptor));
if (args[0].IsImmediate()) {
const u32 new_pc = args[0].GetImmediateU32();
const u32 mask = mcl::bit::get_bit<0>(new_pc) ? 0xFFFFFFFE : 0xFFFFFFFC;
const u32 new_upper = upper_without_t | (mcl::bit::get_bit<0>(new_pc) ? 1 : 0);
code.MOV(Xscratch0, (u64{new_upper} << 32) | (new_pc & mask));
code.STUR(Xscratch0, Xstate, offsetof(A32JitState, regs) + 15 * sizeof(u32));
} else {
auto Wpc = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wpc);
ctx.reg_alloc.SpillFlags();
code.ANDS(Wscratch0, Wpc, 1);
code.MOV(Wscratch1, 3);
code.CSEL(Wscratch1, Wscratch0, Wscratch1, NE);
code.BIC(Wscratch1, Wpc, Wscratch1);
code.MOV(Wscratch0, upper_without_t);
code.CINC(Wscratch0, Wscratch0, NE);
code.STP(Wscratch1, Wscratch0, Xstate, offsetof(A32JitState, regs) + 15 * sizeof(u32));
}
}
template<>
void EmitIR<IR::Opcode::A32UpdateUpperLocationDescriptor>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst*) {
for (auto& inst : ctx.block) {
if (inst.GetOpcode() == IR::Opcode::A32BXWritePC) {
return;
}
}
EmitSetUpperLocationDescriptor(code, ctx, ctx.block.EndLocation(), ctx.block.Location());
}
template<>
void EmitIR<IR::Opcode::A32CallSupervisor>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall();
if (ctx.conf.enable_cycle_counting) {
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
code.SUB(X1, X1, Xticks);
EmitRelocation(code, ctx, LinkTarget::AddTicks);
}
code.MOV(W1, args[0].GetImmediateU32());
EmitRelocation(code, ctx, LinkTarget::CallSVC);
if (ctx.conf.enable_cycle_counting) {
EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining);
code.STR(X0, SP, offsetof(StackLayout, cycles_to_run));
code.MOV(Xticks, X0);
}
}
template<>
void EmitIR<IR::Opcode::A32ExceptionRaised>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall();
if (ctx.conf.enable_cycle_counting) {
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
code.SUB(X1, X1, Xticks);
EmitRelocation(code, ctx, LinkTarget::AddTicks);
}
code.MOV(W1, args[0].GetImmediateU32());
code.MOV(W2, args[1].GetImmediateU32());
EmitRelocation(code, ctx, LinkTarget::ExceptionRaised);
if (ctx.conf.enable_cycle_counting) {
EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining);
code.STR(X0, SP, offsetof(StackLayout, cycles_to_run));
code.MOV(Xticks, X0);
}
}
template<>
void EmitIR<IR::Opcode::A32DataSynchronizationBarrier>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
code.DSB(oaknut::BarrierOp::SY);
}
template<>
void EmitIR<IR::Opcode::A32DataMemoryBarrier>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
code.DMB(oaknut::BarrierOp::SY);
}
template<>
void EmitIR<IR::Opcode::A32InstructionSynchronizationBarrier>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst*) {
if (!ctx.conf.hook_isb) {
return;
}
ctx.reg_alloc.PrepareForCall();
EmitRelocation(code, ctx, LinkTarget::InstructionSynchronizationBarrierRaised);
}
template<>
void EmitIR<IR::Opcode::A32GetFpscr>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto Wfpscr = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wfpscr);
ctx.fpsr.Spill();
static_assert(offsetof(A32JitState, fpsr) + sizeof(u32) == offsetof(A32JitState, fpsr_nzcv));
code.LDR(Wfpscr, Xstate, offsetof(A32JitState, upper_location_descriptor));
code.LDP(Wscratch0, Wscratch1, Xstate, offsetof(A32JitState, fpsr));
code.AND(Wfpscr, Wfpscr, 0xffff'0000);
code.ORR(Wscratch0, Wscratch0, Wscratch1);
code.ORR(Wfpscr, Wfpscr, Wscratch0);
}
template<>
void EmitIR<IR::Opcode::A32SetFpscr>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wfpscr = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wfpscr);
ctx.fpsr.Overwrite();
static_assert(offsetof(A32JitState, fpsr) + sizeof(u32) == offsetof(A32JitState, fpsr_nzcv));
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
code.MOV(Wscratch1, 0x07f7'0000);
code.AND(Wscratch1, Wfpscr, Wscratch1);
code.AND(Wscratch0, Wscratch0, 0x0000'ffff);
code.ORR(Wscratch0, Wscratch0, Wscratch1);
code.STR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
code.MOV(Wscratch0, 0x0800'009f);
code.AND(Wscratch0, Wfpscr, Wscratch0);
code.AND(Wscratch1, Wfpscr, 0xf000'0000);
code.STP(Wscratch0, Wscratch1, Xstate, offsetof(A32JitState, fpsr));
}
template<>
void EmitIR<IR::Opcode::A32GetFpscrNZCV>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto Wnzcv = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wnzcv);
code.LDR(Wnzcv, Xstate, offsetof(A32JitState, fpsr_nzcv));
}
template<>
void EmitIR<IR::Opcode::A32SetFpscrNZCV>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wnzcv);
code.STR(Wnzcv, Xstate, offsetof(A32JitState, fpsr_nzcv));
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,299 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/a32_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/interface/A32/coprocessor.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
static void EmitCoprocessorException() {
ASSERT_FALSE("Should raise coproc exception here");
}
static void CallCoprocCallback(oaknut::CodeGenerator& code, EmitContext& ctx, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, std::optional<Argument::copyable_reference> arg0 = {}, std::optional<Argument::copyable_reference> arg1 = {}) {
ctx.reg_alloc.PrepareForCall({}, arg0, arg1);
if (callback.user_arg) {
code.MOV(X0, reinterpret_cast<u64>(*callback.user_arg));
}
code.MOV(Xscratch0, reinterpret_cast<u64>(callback.function));
code.BLR(Xscratch0);
if (inst) {
ctx.reg_alloc.DefineAsRegister(inst, X0);
}
}
template<>
void EmitIR<IR::Opcode::A32CoprocInternalOperation>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto coproc_info = inst->GetArg(0).GetCoprocInfo();
const size_t coproc_num = coproc_info[0];
const bool two = coproc_info[1] != 0;
const auto opc1 = static_cast<unsigned>(coproc_info[2]);
const auto CRd = static_cast<A32::CoprocReg>(coproc_info[3]);
const auto CRn = static_cast<A32::CoprocReg>(coproc_info[4]);
const auto CRm = static_cast<A32::CoprocReg>(coproc_info[5]);
const auto opc2 = static_cast<unsigned>(coproc_info[6]);
std::shared_ptr<A32::Coprocessor> coproc = ctx.conf.coprocessors[coproc_num];
if (!coproc) {
EmitCoprocessorException();
return;
}
const auto action = coproc->CompileInternalOperation(two, opc1, CRd, CRn, CRm, opc2);
if (!action) {
EmitCoprocessorException();
return;
}
CallCoprocCallback(code, ctx, *action);
}
template<>
void EmitIR<IR::Opcode::A32CoprocSendOneWord>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto coproc_info = inst->GetArg(0).GetCoprocInfo();
const size_t coproc_num = coproc_info[0];
const bool two = coproc_info[1] != 0;
const auto opc1 = static_cast<unsigned>(coproc_info[2]);
const auto CRn = static_cast<A32::CoprocReg>(coproc_info[3]);
const auto CRm = static_cast<A32::CoprocReg>(coproc_info[4]);
const auto opc2 = static_cast<unsigned>(coproc_info[5]);
std::shared_ptr<A32::Coprocessor> coproc = ctx.conf.coprocessors[coproc_num];
if (!coproc) {
EmitCoprocessorException();
return;
}
const auto action = coproc->CompileSendOneWord(two, opc1, CRn, CRm, opc2);
if (std::holds_alternative<std::monostate>(action)) {
EmitCoprocessorException();
return;
}
if (const auto cb = std::get_if<A32::Coprocessor::Callback>(&action)) {
CallCoprocCallback(code, ctx, *cb, nullptr, args[1]);
return;
}
if (const auto destination_ptr = std::get_if<u32*>(&action)) {
auto Wvalue = ctx.reg_alloc.ReadW(args[1]);
RegAlloc::Realize(Wvalue);
code.MOV(Xscratch0, reinterpret_cast<u64>(*destination_ptr));
code.STR(Wvalue, Xscratch0);
return;
}
UNREACHABLE();
}
template<>
void EmitIR<IR::Opcode::A32CoprocSendTwoWords>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto coproc_info = inst->GetArg(0).GetCoprocInfo();
const size_t coproc_num = coproc_info[0];
const bool two = coproc_info[1] != 0;
const auto opc = static_cast<unsigned>(coproc_info[2]);
const auto CRm = static_cast<A32::CoprocReg>(coproc_info[3]);
std::shared_ptr<A32::Coprocessor> coproc = ctx.conf.coprocessors[coproc_num];
if (!coproc) {
EmitCoprocessorException();
return;
}
const auto action = coproc->CompileSendTwoWords(two, opc, CRm);
if (std::holds_alternative<std::monostate>(action)) {
EmitCoprocessorException();
return;
}
if (const auto cb = std::get_if<A32::Coprocessor::Callback>(&action)) {
CallCoprocCallback(code, ctx, *cb, nullptr, args[1], args[2]);
return;
}
if (const auto destination_ptrs = std::get_if<std::array<u32*, 2>>(&action)) {
auto Wvalue1 = ctx.reg_alloc.ReadW(args[1]);
auto Wvalue2 = ctx.reg_alloc.ReadW(args[2]);
RegAlloc::Realize(Wvalue1, Wvalue2);
code.MOV(Xscratch0, reinterpret_cast<u64>((*destination_ptrs)[0]));
code.MOV(Xscratch1, reinterpret_cast<u64>((*destination_ptrs)[1]));
code.STR(Wvalue1, Xscratch0);
code.STR(Wvalue2, Xscratch1);
return;
}
UNREACHABLE();
}
template<>
void EmitIR<IR::Opcode::A32CoprocGetOneWord>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto coproc_info = inst->GetArg(0).GetCoprocInfo();
const size_t coproc_num = coproc_info[0];
const bool two = coproc_info[1] != 0;
const auto opc1 = static_cast<unsigned>(coproc_info[2]);
const auto CRn = static_cast<A32::CoprocReg>(coproc_info[3]);
const auto CRm = static_cast<A32::CoprocReg>(coproc_info[4]);
const auto opc2 = static_cast<unsigned>(coproc_info[5]);
std::shared_ptr<A32::Coprocessor> coproc = ctx.conf.coprocessors[coproc_num];
if (!coproc) {
EmitCoprocessorException();
return;
}
const auto action = coproc->CompileGetOneWord(two, opc1, CRn, CRm, opc2);
if (std::holds_alternative<std::monostate>(action)) {
EmitCoprocessorException();
return;
}
if (const auto cb = std::get_if<A32::Coprocessor::Callback>(&action)) {
CallCoprocCallback(code, ctx, *cb, inst);
return;
}
if (const auto source_ptr = std::get_if<u32*>(&action)) {
auto Wvalue = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wvalue);
code.MOV(Xscratch0, reinterpret_cast<u64>(*source_ptr));
code.LDR(Wvalue, Xscratch0);
return;
}
UNREACHABLE();
}
template<>
void EmitIR<IR::Opcode::A32CoprocGetTwoWords>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto coproc_info = inst->GetArg(0).GetCoprocInfo();
const size_t coproc_num = coproc_info[0];
const bool two = coproc_info[1] != 0;
const unsigned opc = coproc_info[2];
const auto CRm = static_cast<A32::CoprocReg>(coproc_info[3]);
std::shared_ptr<A32::Coprocessor> coproc = ctx.conf.coprocessors[coproc_num];
if (!coproc) {
EmitCoprocessorException();
return;
}
auto action = coproc->CompileGetTwoWords(two, opc, CRm);
if (std::holds_alternative<std::monostate>(action)) {
EmitCoprocessorException();
return;
}
if (const auto cb = std::get_if<A32::Coprocessor::Callback>(&action)) {
CallCoprocCallback(code, ctx, *cb, inst);
return;
}
if (const auto source_ptrs = std::get_if<std::array<u32*, 2>>(&action)) {
auto Xvalue = ctx.reg_alloc.WriteX(inst);
RegAlloc::Realize(Xvalue);
code.MOV(Xscratch0, reinterpret_cast<u64>((*source_ptrs)[0]));
code.MOV(Xscratch1, reinterpret_cast<u64>((*source_ptrs)[1]));
code.LDR(Xvalue, Xscratch0);
code.LDR(Wscratch1, Xscratch1);
code.BFI(Xvalue, Xscratch1, 32, 32);
return;
}
UNREACHABLE();
}
template<>
void EmitIR<IR::Opcode::A32CoprocLoadWords>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto coproc_info = inst->GetArg(0).GetCoprocInfo();
const size_t coproc_num = coproc_info[0];
const bool two = coproc_info[1] != 0;
const bool long_transfer = coproc_info[2] != 0;
const auto CRd = static_cast<A32::CoprocReg>(coproc_info[3]);
const bool has_option = coproc_info[4] != 0;
std::optional<u8> option = std::nullopt;
if (has_option) {
option = coproc_info[5];
}
std::shared_ptr<A32::Coprocessor> coproc = ctx.conf.coprocessors[coproc_num];
if (!coproc) {
EmitCoprocessorException();
return;
}
const auto action = coproc->CompileLoadWords(two, long_transfer, CRd, option);
if (!action) {
EmitCoprocessorException();
return;
}
CallCoprocCallback(code, ctx, *action, nullptr, args[1]);
}
template<>
void EmitIR<IR::Opcode::A32CoprocStoreWords>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto coproc_info = inst->GetArg(0).GetCoprocInfo();
const size_t coproc_num = coproc_info[0];
const bool two = coproc_info[1] != 0;
const bool long_transfer = coproc_info[2] != 0;
const auto CRd = static_cast<A32::CoprocReg>(coproc_info[3]);
const bool has_option = coproc_info[4] != 0;
std::optional<u8> option = std::nullopt;
if (has_option) {
option = coproc_info[5];
}
std::shared_ptr<A32::Coprocessor> coproc = ctx.conf.coprocessors[coproc_num];
if (!coproc) {
EmitCoprocessorException();
return;
}
const auto action = coproc->CompileStoreWords(two, long_transfer, CRd, option);
if (!action) {
EmitCoprocessorException();
return;
}
CallCoprocCallback(code, ctx, *action, nullptr, args[1]);
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,107 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/a32_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/emit_arm64_memory.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
template<>
void EmitIR<IR::Opcode::A32ClearExclusive>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
code.STR(WZR, Xstate, offsetof(A32JitState, exclusive_state));
}
template<>
void EmitIR<IR::Opcode::A32ReadMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitReadMemory<8>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A32ReadMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitReadMemory<16>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A32ReadMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitReadMemory<32>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A32ReadMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitReadMemory<64>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A32ExclusiveReadMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitExclusiveReadMemory<8>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A32ExclusiveReadMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitExclusiveReadMemory<16>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A32ExclusiveReadMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitExclusiveReadMemory<32>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A32ExclusiveReadMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitExclusiveReadMemory<64>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A32WriteMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitWriteMemory<8>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A32WriteMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitWriteMemory<16>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A32WriteMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitWriteMemory<32>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A32WriteMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitWriteMemory<64>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A32ExclusiveWriteMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitExclusiveWriteMemory<8>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A32ExclusiveWriteMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitExclusiveWriteMemory<16>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A32ExclusiveWriteMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitExclusiveWriteMemory<32>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A32ExclusiveWriteMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitExclusiveWriteMemory<64>(code, ctx, inst);
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,519 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <mcl/bit_cast.hpp>
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/a64_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/fpsr_manager.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/interface/halt_reason.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
oaknut::Label EmitA64Cond(oaknut::CodeGenerator& code, EmitContext&, IR::Cond cond) {
oaknut::Label pass;
// TODO: Flags in host flags
code.LDR(Wscratch0, Xstate, offsetof(A64JitState, cpsr_nzcv));
code.MSR(oaknut::SystemReg::NZCV, Xscratch0);
code.B(static_cast<oaknut::Cond>(cond), pass);
return pass;
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step);
void EmitA64Terminal(oaknut::CodeGenerator&, EmitContext&, IR::Term::Interpret, IR::LocationDescriptor, bool) {
ASSERT_FALSE("Interpret should never be emitted.");
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) {
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlock terminal, IR::LocationDescriptor, bool is_single_step) {
oaknut::Label fail;
if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
if (ctx.conf.enable_cycle_counting) {
code.CMP(Xticks, 0);
code.B(LE, fail);
EmitBlockLinkRelocation(code, ctx, terminal.next, BlockRelocationType::Branch);
} else {
code.LDAR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, fail);
EmitBlockLinkRelocation(code, ctx, terminal.next, BlockRelocationType::Branch);
}
}
code.l(fail);
code.MOV(Xscratch0, A64::LocationDescriptor{terminal.next}.PC());
code.STR(Xscratch0, Xstate, offsetof(A64JitState, pc));
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) {
if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
EmitBlockLinkRelocation(code, ctx, terminal.next, BlockRelocationType::Branch);
}
code.MOV(Xscratch0, A64::LocationDescriptor{terminal.next}.PC());
code.STR(Xscratch0, Xstate, offsetof(A64JitState, pc));
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::PopRSBHint, IR::LocationDescriptor, bool is_single_step) {
if (ctx.conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) && !is_single_step) {
oaknut::Label fail;
code.MOV(Wscratch0, A64::LocationDescriptor::fpcr_mask);
code.LDR(W0, Xstate, offsetof(A64JitState, fpcr));
code.LDR(X1, Xstate, offsetof(A64JitState, pc));
code.AND(W0, W0, Wscratch0);
code.AND(X1, X1, A64::LocationDescriptor::pc_mask);
code.LSL(X0, X0, A64::LocationDescriptor::fpcr_shift);
code.ORR(X0, X0, X1);
code.LDR(Wscratch2, SP, offsetof(StackLayout, rsb_ptr));
code.AND(Wscratch2, Wscratch2, RSBIndexMask);
code.ADD(X2, SP, Xscratch2);
code.SUB(Wscratch2, Wscratch2, sizeof(RSBEntry));
code.STR(Wscratch2, SP, offsetof(StackLayout, rsb_ptr));
code.LDP(Xscratch0, Xscratch1, X2, offsetof(StackLayout, rsb));
code.CMP(X0, Xscratch0);
code.B(NE, fail);
code.BR(Xscratch1);
code.l(fail);
}
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::FastDispatchHint, IR::LocationDescriptor, bool) {
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
// TODO: Implement FastDispatchHint optimization
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
oaknut::Label pass = EmitA64Cond(code, ctx, terminal.if_);
EmitA64Terminal(code, ctx, terminal.else_, initial_location, is_single_step);
code.l(pass);
EmitA64Terminal(code, ctx, terminal.then_, initial_location, is_single_step);
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
oaknut::Label fail;
code.LDRB(Wscratch0, SP, offsetof(StackLayout, check_bit));
code.CBZ(Wscratch0, fail);
EmitA64Terminal(code, ctx, terminal.then_, initial_location, is_single_step);
code.l(fail);
EmitA64Terminal(code, ctx, terminal.else_, initial_location, is_single_step);
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
oaknut::Label fail;
code.LDAR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, fail);
EmitA64Terminal(code, ctx, terminal.else_, initial_location, is_single_step);
code.l(fail);
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
boost::apply_visitor([&](const auto& t) { EmitA64Terminal(code, ctx, t, initial_location, is_single_step); }, terminal);
}
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx) {
const A64::LocationDescriptor location{ctx.block.Location()};
EmitA64Terminal(code, ctx, ctx.block.GetTerminal(), location.SetSingleStepping(false), location.SingleStepping());
}
void EmitA64ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx) {
const A64::LocationDescriptor location{ctx.block.Location()};
EmitA64Terminal(code, ctx, IR::Term::LinkBlock{ctx.block.ConditionFailedLocation()}, location.SetSingleStepping(false), location.SingleStepping());
}
void EmitA64CheckMemoryAbort(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, oaknut::Label& end) {
if (!ctx.conf.check_halt_on_memory_access) {
return;
}
const A64::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}};
code.LDAR(Xscratch0, Xhalt);
code.TST(Xscratch0, static_cast<u32>(HaltReason::MemoryAbort));
code.B(EQ, end);
code.MOV(Xscratch0, current_location.PC());
code.STR(Xscratch0, Xstate, offsetof(A64JitState, pc));
EmitRelocation(code, ctx, LinkTarget::ReturnFromRunCode);
}
template<>
void EmitIR<IR::Opcode::A64SetCheckBit>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
if (args[0].GetImmediateU1()) {
code.MOV(Wscratch0, 1);
code.STRB(Wscratch0, SP, offsetof(StackLayout, check_bit));
} else {
code.STRB(WZR, SP, offsetof(StackLayout, check_bit));
}
} else {
auto Wbit = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wbit);
code.STRB(Wbit, SP, offsetof(StackLayout, check_bit));
}
}
template<>
void EmitIR<IR::Opcode::A64GetCFlag>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto Wflag = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wflag);
code.LDR(Wflag, Xstate, offsetof(A64JitState, cpsr_nzcv));
code.AND(Wflag, Wflag, 1 << 29);
}
template<>
void EmitIR<IR::Opcode::A64GetNZCVRaw>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto Wnzcv = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wnzcv);
code.LDR(Wnzcv, Xstate, offsetof(A64JitState, cpsr_nzcv));
}
template<>
void EmitIR<IR::Opcode::A64SetNZCVRaw>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wnzcv);
code.STR(Wnzcv, Xstate, offsetof(A64JitState, cpsr_nzcv));
}
template<>
void EmitIR<IR::Opcode::A64SetNZCV>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wnzcv);
code.STR(Wnzcv, Xstate, offsetof(A64JitState, cpsr_nzcv));
}
template<>
void EmitIR<IR::Opcode::A64GetW>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
auto Wresult = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wresult);
// TODO: Detect if Gpr vs Fpr is more appropriate
code.LDR(Wresult, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg));
}
template<>
void EmitIR<IR::Opcode::A64GetX>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
auto Xresult = ctx.reg_alloc.WriteX(inst);
RegAlloc::Realize(Xresult);
// TODO: Detect if Gpr vs Fpr is more appropriate
code.LDR(Xresult, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg));
}
template<>
void EmitIR<IR::Opcode::A64GetS>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
auto Sresult = ctx.reg_alloc.WriteS(inst);
RegAlloc::Realize(Sresult);
code.LDR(Sresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
}
template<>
void EmitIR<IR::Opcode::A64GetD>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
auto Dresult = ctx.reg_alloc.WriteD(inst);
RegAlloc::Realize(Dresult);
code.LDR(Dresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
}
template<>
void EmitIR<IR::Opcode::A64GetQ>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
auto Qresult = ctx.reg_alloc.WriteQ(inst);
RegAlloc::Realize(Qresult);
code.LDR(Qresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
}
template<>
void EmitIR<IR::Opcode::A64GetSP>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto Xresult = ctx.reg_alloc.WriteX(inst);
RegAlloc::Realize(Xresult);
code.LDR(Xresult, Xstate, offsetof(A64JitState, sp));
}
template<>
void EmitIR<IR::Opcode::A64GetFPCR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto Wresult = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wresult);
code.LDR(Wresult, Xstate, offsetof(A64JitState, fpcr));
}
template<>
void EmitIR<IR::Opcode::A64GetFPSR>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
auto Wresult = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wresult);
ctx.fpsr.GetFpsr(Wresult);
}
template<>
void EmitIR<IR::Opcode::A64SetW>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wvalue = ctx.reg_alloc.ReadW(args[1]);
RegAlloc::Realize(Wvalue);
// TODO: Detect if Gpr vs Fpr is more appropriate
code.MOV(*Wvalue, Wvalue);
code.STR(Wvalue->toX(), Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg));
}
template<>
void EmitIR<IR::Opcode::A64SetX>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xvalue = ctx.reg_alloc.ReadX(args[1]);
RegAlloc::Realize(Xvalue);
// TODO: Detect if Gpr vs Fpr is more appropriate
code.STR(Xvalue, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg));
}
template<>
void EmitIR<IR::Opcode::A64SetS>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
auto Svalue = ctx.reg_alloc.ReadS(args[1]);
RegAlloc::Realize(Svalue);
code.FMOV(Svalue, Svalue);
code.STR(Svalue->toQ(), Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
}
template<>
void EmitIR<IR::Opcode::A64SetD>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
auto Dvalue = ctx.reg_alloc.ReadD(args[1]);
RegAlloc::Realize(Dvalue);
code.FMOV(Dvalue, Dvalue);
code.STR(Dvalue->toQ(), Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
}
template<>
void EmitIR<IR::Opcode::A64SetQ>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
auto Qvalue = ctx.reg_alloc.ReadQ(args[1]);
RegAlloc::Realize(Qvalue);
code.STR(Qvalue, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
}
template<>
void EmitIR<IR::Opcode::A64SetSP>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
RegAlloc::Realize(Xvalue);
code.STR(Xvalue, Xstate, offsetof(A64JitState, sp));
}
template<>
void EmitIR<IR::Opcode::A64SetFPCR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wvalue = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wvalue);
code.STR(Wvalue, Xstate, offsetof(A64JitState, fpcr));
code.MSR(oaknut::SystemReg::FPCR, Wvalue->toX());
}
template<>
void EmitIR<IR::Opcode::A64SetFPSR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wvalue = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wvalue);
code.STR(Wvalue, Xstate, offsetof(A64JitState, fpsr));
code.MSR(oaknut::SystemReg::FPSR, Wvalue->toX());
}
template<>
void EmitIR<IR::Opcode::A64SetPC>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
RegAlloc::Realize(Xvalue);
code.STR(Xvalue, Xstate, offsetof(A64JitState, pc));
}
template<>
void EmitIR<IR::Opcode::A64CallSupervisor>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall();
if (ctx.conf.enable_cycle_counting) {
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
code.SUB(X1, X1, Xticks);
EmitRelocation(code, ctx, LinkTarget::AddTicks);
}
code.MOV(W1, args[0].GetImmediateU32());
EmitRelocation(code, ctx, LinkTarget::CallSVC);
if (ctx.conf.enable_cycle_counting) {
EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining);
code.STR(X0, SP, offsetof(StackLayout, cycles_to_run));
code.MOV(Xticks, X0);
}
}
template<>
void EmitIR<IR::Opcode::A64ExceptionRaised>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall();
if (ctx.conf.enable_cycle_counting) {
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
code.SUB(X1, X1, Xticks);
EmitRelocation(code, ctx, LinkTarget::AddTicks);
}
code.MOV(X1, args[0].GetImmediateU64());
code.MOV(X2, args[1].GetImmediateU64());
EmitRelocation(code, ctx, LinkTarget::ExceptionRaised);
if (ctx.conf.enable_cycle_counting) {
EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining);
code.STR(X0, SP, offsetof(StackLayout, cycles_to_run));
code.MOV(Xticks, X0);
}
}
template<>
void EmitIR<IR::Opcode::A64DataCacheOperationRaised>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall({}, args[1], args[2]);
EmitRelocation(code, ctx, LinkTarget::DataCacheOperationRaised);
}
template<>
void EmitIR<IR::Opcode::A64InstructionCacheOperationRaised>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall({}, args[0], args[1]);
EmitRelocation(code, ctx, LinkTarget::InstructionCacheOperationRaised);
}
template<>
void EmitIR<IR::Opcode::A64DataSynchronizationBarrier>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
code.DSB(oaknut::BarrierOp::SY);
}
template<>
void EmitIR<IR::Opcode::A64DataMemoryBarrier>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
code.DMB(oaknut::BarrierOp::SY);
}
template<>
void EmitIR<IR::Opcode::A64InstructionSynchronizationBarrier>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst*) {
if (!ctx.conf.hook_isb) {
return;
}
ctx.reg_alloc.PrepareForCall();
EmitRelocation(code, ctx, LinkTarget::InstructionSynchronizationBarrierRaised);
}
template<>
void EmitIR<IR::Opcode::A64GetCNTFRQ>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto Xvalue = ctx.reg_alloc.WriteX(inst);
RegAlloc::Realize(Xvalue);
code.MOV(Xvalue, ctx.conf.cntfreq_el0);
}
template<>
void EmitIR<IR::Opcode::A64GetCNTPCT>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.PrepareForCall();
if (!ctx.conf.wall_clock_cntpct && ctx.conf.enable_cycle_counting) {
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
code.SUB(X1, X1, Xticks);
EmitRelocation(code, ctx, LinkTarget::AddTicks);
EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining);
code.STR(X0, SP, offsetof(StackLayout, cycles_to_run));
code.MOV(Xticks, X0);
}
EmitRelocation(code, ctx, LinkTarget::GetCNTPCT);
ctx.reg_alloc.DefineAsRegister(inst, X0);
}
template<>
void EmitIR<IR::Opcode::A64GetCTR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto Wvalue = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wvalue);
code.MOV(Wvalue, ctx.conf.ctr_el0);
}
template<>
void EmitIR<IR::Opcode::A64GetDCZID>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto Wvalue = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Wvalue);
code.MOV(Wvalue, ctx.conf.dczid_el0);
}
template<>
void EmitIR<IR::Opcode::A64GetTPIDR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto Xvalue = ctx.reg_alloc.WriteX(inst);
RegAlloc::Realize(Xvalue);
code.MOV(Xscratch0, mcl::bit_cast<u64>(ctx.conf.tpidr_el0));
code.LDR(Xvalue, Xscratch0);
}
template<>
void EmitIR<IR::Opcode::A64GetTPIDRRO>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto Xvalue = ctx.reg_alloc.WriteX(inst);
RegAlloc::Realize(Xvalue);
code.MOV(Xscratch0, mcl::bit_cast<u64>(ctx.conf.tpidrro_el0));
code.LDR(Xvalue, Xscratch0);
}
template<>
void EmitIR<IR::Opcode::A64SetTPIDR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
RegAlloc::Realize(Xvalue);
code.MOV(Xscratch0, mcl::bit_cast<u64>(ctx.conf.tpidr_el0));
code.STR(Xvalue, Xscratch0);
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,128 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/a64_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/emit_arm64_memory.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/ir/acc_type.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
template<>
void EmitIR<IR::Opcode::A64ClearExclusive>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
code.STR(WZR, Xstate, offsetof(A64JitState, exclusive_state));
}
template<>
void EmitIR<IR::Opcode::A64ReadMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitReadMemory<8>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A64ReadMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitReadMemory<16>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A64ReadMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitReadMemory<32>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A64ReadMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitReadMemory<64>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A64ReadMemory128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitReadMemory<128>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveReadMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitExclusiveReadMemory<8>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveReadMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitExclusiveReadMemory<16>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveReadMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitExclusiveReadMemory<32>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveReadMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitExclusiveReadMemory<64>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveReadMemory128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitExclusiveReadMemory<128>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A64WriteMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitWriteMemory<8>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A64WriteMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitWriteMemory<16>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A64WriteMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitWriteMemory<32>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A64WriteMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitWriteMemory<64>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A64WriteMemory128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitWriteMemory<128>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitExclusiveWriteMemory<8>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitExclusiveWriteMemory<16>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitExclusiveWriteMemory<32>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitExclusiveWriteMemory<64>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitExclusiveWriteMemory<128>(code, ctx, inst);
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,166 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/a32_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
template<size_t bitsize, typename EmitFn>
static void EmitCRC(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit_fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Woutput = ctx.reg_alloc.WriteW(inst);
auto Winput = ctx.reg_alloc.ReadW(args[0]);
auto Rdata = ctx.reg_alloc.ReadReg<bitsize>(args[1]);
RegAlloc::Realize(Woutput, Winput, Rdata);
emit_fn(Woutput, Winput, Rdata);
}
template<>
void EmitIR<IR::Opcode::CRC32Castagnoli8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitCRC<32>(code, ctx, inst, [&](auto& Woutput, auto& Winput, auto& Wdata) { code.CRC32CB(Woutput, Winput, Wdata); });
}
template<>
void EmitIR<IR::Opcode::CRC32Castagnoli16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitCRC<32>(code, ctx, inst, [&](auto& Woutput, auto& Winput, auto& Wdata) { code.CRC32CH(Woutput, Winput, Wdata); });
}
template<>
void EmitIR<IR::Opcode::CRC32Castagnoli32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitCRC<32>(code, ctx, inst, [&](auto& Woutput, auto& Winput, auto& Wdata) { code.CRC32CW(Woutput, Winput, Wdata); });
}
template<>
void EmitIR<IR::Opcode::CRC32Castagnoli64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitCRC<64>(code, ctx, inst, [&](auto& Woutput, auto& Winput, auto& Xdata) { code.CRC32CX(Woutput, Winput, Xdata); });
}
template<>
void EmitIR<IR::Opcode::CRC32ISO8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitCRC<32>(code, ctx, inst, [&](auto& Woutput, auto& Winput, auto& Wdata) { code.CRC32B(Woutput, Winput, Wdata); });
}
template<>
void EmitIR<IR::Opcode::CRC32ISO16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitCRC<32>(code, ctx, inst, [&](auto& Woutput, auto& Winput, auto& Wdata) { code.CRC32H(Woutput, Winput, Wdata); });
}
template<>
void EmitIR<IR::Opcode::CRC32ISO32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitCRC<32>(code, ctx, inst, [&](auto& Woutput, auto& Winput, auto& Wdata) { code.CRC32W(Woutput, Winput, Wdata); });
}
template<>
void EmitIR<IR::Opcode::CRC32ISO64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitCRC<64>(code, ctx, inst, [&](auto& Woutput, auto& Winput, auto& Xdata) { code.CRC32X(Woutput, Winput, Xdata); });
}
template<>
void EmitIR<IR::Opcode::AESDecryptSingleRound>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Qoutput = ctx.reg_alloc.WriteQ(inst);
auto Qinput = ctx.reg_alloc.ReadQ(args[0]);
RegAlloc::Realize(Qoutput, Qinput);
code.MOVI(Qoutput->toD(), oaknut::RepImm{0});
code.AESD(Qoutput->B16(), Qinput->B16());
}
template<>
void EmitIR<IR::Opcode::AESEncryptSingleRound>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Qoutput = ctx.reg_alloc.WriteQ(inst);
auto Qinput = ctx.reg_alloc.ReadQ(args[0]);
RegAlloc::Realize(Qoutput, Qinput);
code.MOVI(Qoutput->toD(), oaknut::RepImm{0});
code.AESE(Qoutput->B16(), Qinput->B16());
}
template<>
void EmitIR<IR::Opcode::AESInverseMixColumns>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Qoutput = ctx.reg_alloc.WriteQ(inst);
auto Qinput = ctx.reg_alloc.ReadQ(args[0]);
RegAlloc::Realize(Qoutput, Qinput);
code.AESIMC(Qoutput->B16(), Qinput->B16());
}
template<>
void EmitIR<IR::Opcode::AESMixColumns>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Qoutput = ctx.reg_alloc.WriteQ(inst);
auto Qinput = ctx.reg_alloc.ReadQ(args[0]);
RegAlloc::Realize(Qoutput, Qinput);
code.AESMC(Qoutput->B16(), Qinput->B16());
}
template<>
void EmitIR<IR::Opcode::SM4AccessSubstitutionBox>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::SHA256Hash>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool part1 = args[3].GetImmediateU1();
if (part1) {
auto Qx = ctx.reg_alloc.ReadWriteQ(args[0], inst);
auto Qy = ctx.reg_alloc.ReadQ(args[1]);
auto Qz = ctx.reg_alloc.ReadQ(args[2]);
RegAlloc::Realize(Qx, Qy, Qz);
code.SHA256H(Qx, Qy, Qz->S4());
} else {
auto Qx = ctx.reg_alloc.ReadQ(args[0]);
auto Qy = ctx.reg_alloc.ReadWriteQ(args[1], inst);
auto Qz = ctx.reg_alloc.ReadQ(args[2]);
RegAlloc::Realize(Qx, Qy, Qz);
code.SHA256H2(Qy, Qx, Qz->S4()); // Yes x and y are swapped
}
}
template<>
void EmitIR<IR::Opcode::SHA256MessageSchedule0>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Qa = ctx.reg_alloc.ReadWriteQ(args[0], inst);
auto Qb = ctx.reg_alloc.ReadQ(args[1]);
RegAlloc::Realize(Qa, Qb);
code.SHA256SU0(Qa->S4(), Qb->S4());
}
template<>
void EmitIR<IR::Opcode::SHA256MessageSchedule1>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Qa = ctx.reg_alloc.ReadWriteQ(args[0], inst);
auto Qb = ctx.reg_alloc.ReadQ(args[1]);
auto Qc = ctx.reg_alloc.ReadQ(args[2]);
RegAlloc::Realize(Qa, Qb, Qc);
code.SHA256SU1(Qa->S4(), Qb->S4(), Qc->S4());
}
} // namespace Dynarmic::Backend::Arm64

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,801 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/a32_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/fpsr_manager.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
template<size_t bitsize, typename EmitFn>
static void EmitTwoOp(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Vresult = ctx.reg_alloc.WriteVec<bitsize>(inst);
auto Voperand = ctx.reg_alloc.ReadVec<bitsize>(args[0]);
RegAlloc::Realize(Vresult, Voperand);
ctx.fpsr.Load();
emit(Vresult, Voperand);
}
template<size_t bitsize, typename EmitFn>
static void EmitThreeOp(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Vresult = ctx.reg_alloc.WriteVec<bitsize>(inst);
auto Va = ctx.reg_alloc.ReadVec<bitsize>(args[0]);
auto Vb = ctx.reg_alloc.ReadVec<bitsize>(args[1]);
RegAlloc::Realize(Vresult, Va, Vb);
ctx.fpsr.Load();
emit(Vresult, Va, Vb);
}
template<size_t bitsize, typename EmitFn>
static void EmitFourOp(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Vresult = ctx.reg_alloc.WriteVec<bitsize>(inst);
auto Va = ctx.reg_alloc.ReadVec<bitsize>(args[0]);
auto Vb = ctx.reg_alloc.ReadVec<bitsize>(args[1]);
auto Vc = ctx.reg_alloc.ReadVec<bitsize>(args[2]);
RegAlloc::Realize(Vresult, Va, Vb, Vc);
ctx.fpsr.Load();
emit(Vresult, Va, Vb, Vc);
}
template<size_t bitsize_from, size_t bitsize_to, typename EmitFn>
static void EmitConvert(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Vto = ctx.reg_alloc.WriteVec<bitsize_to>(inst);
auto Vfrom = ctx.reg_alloc.ReadVec<bitsize_from>(args[0]);
const auto rounding_mode = static_cast<FP::RoundingMode>(args[1].GetImmediateU8());
RegAlloc::Realize(Vto, Vfrom);
ctx.fpsr.Load();
ASSERT(rounding_mode == ctx.FPCR().RMode());
emit(Vto, Vfrom);
}
template<size_t bitsize_from, size_t bitsize_to, bool is_signed>
static void EmitToFixed(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Rto = ctx.reg_alloc.WriteReg<std::max<size_t>(bitsize_to, 32)>(inst);
auto Vfrom = ctx.reg_alloc.ReadVec<bitsize_from>(args[0]);
const size_t fbits = args[1].GetImmediateU8();
const auto rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
RegAlloc::Realize(Rto, Vfrom);
ctx.fpsr.Load();
if (rounding_mode == FP::RoundingMode::TowardsZero) {
if constexpr (is_signed) {
if constexpr (bitsize_to == 16) {
code.FCVTZS(Rto, Vfrom, fbits + 16);
code.ASR(Wscratch0, Rto, 31);
code.ADD(Rto, Rto, Wscratch0, LSR, 16); // Round towards zero when truncating
code.LSR(Rto, Rto, 16);
} else if (fbits) {
code.FCVTZS(Rto, Vfrom, fbits);
} else {
code.FCVTZS(Rto, Vfrom);
}
} else {
if constexpr (bitsize_to == 16) {
code.FCVTZU(Rto, Vfrom, fbits + 16);
code.LSR(Rto, Rto, 16);
} else if (fbits) {
code.FCVTZU(Rto, Vfrom, fbits);
} else {
code.FCVTZU(Rto, Vfrom);
}
}
} else {
ASSERT(fbits == 0);
ASSERT(bitsize_to != 16);
if constexpr (is_signed) {
switch (rounding_mode) {
case FP::RoundingMode::ToNearest_TieEven:
code.FCVTNS(Rto, Vfrom);
break;
case FP::RoundingMode::TowardsPlusInfinity:
code.FCVTPS(Rto, Vfrom);
break;
case FP::RoundingMode::TowardsMinusInfinity:
code.FCVTMS(Rto, Vfrom);
break;
case FP::RoundingMode::TowardsZero:
code.FCVTZS(Rto, Vfrom);
break;
case FP::RoundingMode::ToNearest_TieAwayFromZero:
code.FCVTAS(Rto, Vfrom);
break;
case FP::RoundingMode::ToOdd:
ASSERT_FALSE("Unimplemented");
break;
default:
ASSERT_FALSE("Invalid RoundingMode");
break;
}
} else {
switch (rounding_mode) {
case FP::RoundingMode::ToNearest_TieEven:
code.FCVTNU(Rto, Vfrom);
break;
case FP::RoundingMode::TowardsPlusInfinity:
code.FCVTPU(Rto, Vfrom);
break;
case FP::RoundingMode::TowardsMinusInfinity:
code.FCVTMU(Rto, Vfrom);
break;
case FP::RoundingMode::TowardsZero:
code.FCVTZU(Rto, Vfrom);
break;
case FP::RoundingMode::ToNearest_TieAwayFromZero:
code.FCVTAU(Rto, Vfrom);
break;
case FP::RoundingMode::ToOdd:
ASSERT_FALSE("Unimplemented");
break;
default:
ASSERT_FALSE("Invalid RoundingMode");
break;
}
}
}
}
template<size_t bitsize_from, size_t bitsize_to, typename EmitFn>
static void EmitFromFixed(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Vto = ctx.reg_alloc.WriteVec<bitsize_to>(inst);
auto Rfrom = ctx.reg_alloc.ReadReg<std::max<size_t>(bitsize_from, 32)>(args[0]);
const size_t fbits = args[1].GetImmediateU8();
const auto rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
RegAlloc::Realize(Vto, Rfrom);
ctx.fpsr.Load();
if (rounding_mode == ctx.FPCR().RMode()) {
emit(Vto, Rfrom, fbits);
} else {
FP::FPCR new_fpcr = ctx.FPCR();
new_fpcr.RMode(rounding_mode);
code.MOV(Wscratch0, new_fpcr.Value());
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
emit(Vto, Rfrom, fbits);
code.MOV(Wscratch0, ctx.FPCR().Value());
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
}
}
template<>
void EmitIR<IR::Opcode::FPAbs16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPAbs32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Soperand) { code.FABS(Sresult, Soperand); });
}
template<>
void EmitIR<IR::Opcode::FPAbs64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Doperand) { code.FABS(Dresult, Doperand); });
}
template<>
void EmitIR<IR::Opcode::FPAdd32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FADD(Sresult, Sa, Sb); });
}
template<>
void EmitIR<IR::Opcode::FPAdd64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FADD(Dresult, Da, Db); });
}
template<size_t size>
void EmitCompare(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto flags = ctx.reg_alloc.WriteFlags(inst);
auto Va = ctx.reg_alloc.ReadVec<size>(args[0]);
const bool exc_on_qnan = args[2].GetImmediateU1();
if (args[1].IsImmediate() && args[1].GetImmediateU64() == 0) {
RegAlloc::Realize(flags, Va);
ctx.fpsr.Load();
if (exc_on_qnan) {
code.FCMPE(Va, 0);
} else {
code.FCMP(Va, 0);
}
} else {
auto Vb = ctx.reg_alloc.ReadVec<size>(args[1]);
RegAlloc::Realize(flags, Va, Vb);
ctx.fpsr.Load();
if (exc_on_qnan) {
code.FCMPE(Va, Vb);
} else {
code.FCMP(Va, Vb);
}
}
}
template<>
void EmitIR<IR::Opcode::FPCompare32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitCompare<32>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::FPCompare64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitCompare<64>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::FPDiv32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FDIV(Sresult, Sa, Sb); });
}
template<>
void EmitIR<IR::Opcode::FPDiv64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FDIV(Dresult, Da, Db); });
}
template<>
void EmitIR<IR::Opcode::FPMax32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FMAX(Sresult, Sa, Sb); });
}
template<>
void EmitIR<IR::Opcode::FPMax64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FMAX(Dresult, Da, Db); });
}
template<>
void EmitIR<IR::Opcode::FPMaxNumeric32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FMAXNM(Sresult, Sa, Sb); });
}
template<>
void EmitIR<IR::Opcode::FPMaxNumeric64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FMAXNM(Dresult, Da, Db); });
}
template<>
void EmitIR<IR::Opcode::FPMin32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FMIN(Sresult, Sa, Sb); });
}
template<>
void EmitIR<IR::Opcode::FPMin64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FMIN(Dresult, Da, Db); });
}
template<>
void EmitIR<IR::Opcode::FPMinNumeric32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FMINNM(Sresult, Sa, Sb); });
}
template<>
void EmitIR<IR::Opcode::FPMinNumeric64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FMINNM(Dresult, Da, Db); });
}
template<>
void EmitIR<IR::Opcode::FPMul32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FMUL(Sresult, Sa, Sb); });
}
template<>
void EmitIR<IR::Opcode::FPMul64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FMUL(Dresult, Da, Db); });
}
template<>
void EmitIR<IR::Opcode::FPMulAdd16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPMulAdd32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitFourOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& S1, auto& S2) { code.FMADD(Sresult, S1, S2, Sa); });
}
template<>
void EmitIR<IR::Opcode::FPMulAdd64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitFourOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& D1, auto& D2) { code.FMADD(Dresult, D1, D2, Da); });
}
template<>
void EmitIR<IR::Opcode::FPMulSub16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPMulSub32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitFourOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& S1, auto& S2) { code.FMSUB(Sresult, S1, S2, Sa); });
}
template<>
void EmitIR<IR::Opcode::FPMulSub64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitFourOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& D1, auto& D2) { code.FMSUB(Dresult, D1, D2, Da); });
}
template<>
void EmitIR<IR::Opcode::FPMulX32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FMULX(Sresult, Sa, Sb); });
}
template<>
void EmitIR<IR::Opcode::FPMulX64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FMULX(Dresult, Da, Db); });
}
template<>
void EmitIR<IR::Opcode::FPNeg16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPNeg32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Soperand) { code.FNEG(Sresult, Soperand); });
}
template<>
void EmitIR<IR::Opcode::FPNeg64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Doperand) { code.FNEG(Dresult, Doperand); });
}
template<>
void EmitIR<IR::Opcode::FPRecipEstimate16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPRecipEstimate32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Soperand) { code.FRECPE(Sresult, Soperand); });
}
template<>
void EmitIR<IR::Opcode::FPRecipEstimate64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Doperand) { code.FRECPE(Dresult, Doperand); });
}
template<>
void EmitIR<IR::Opcode::FPRecipExponent16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPRecipExponent32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Soperand) { code.FRECPX(Sresult, Soperand); });
}
template<>
void EmitIR<IR::Opcode::FPRecipExponent64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Doperand) { code.FRECPX(Dresult, Doperand); });
}
template<>
void EmitIR<IR::Opcode::FPRecipStepFused16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPRecipStepFused32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FRECPS(Sresult, Sa, Sb); });
}
template<>
void EmitIR<IR::Opcode::FPRecipStepFused64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FRECPS(Dresult, Da, Db); });
}
template<>
void EmitIR<IR::Opcode::FPRoundInt16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPRoundInt32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto rounding_mode = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
const bool exact = inst->GetArg(2).GetU1();
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Sresult = ctx.reg_alloc.WriteS(inst);
auto Soperand = ctx.reg_alloc.ReadS(args[0]);
RegAlloc::Realize(Sresult, Soperand);
ctx.fpsr.Load();
if (exact) {
ASSERT(ctx.FPCR().RMode() == rounding_mode);
code.FRINTX(Sresult, Soperand);
} else {
switch (rounding_mode) {
case FP::RoundingMode::ToNearest_TieEven:
code.FRINTN(Sresult, Soperand);
break;
case FP::RoundingMode::TowardsPlusInfinity:
code.FRINTP(Sresult, Soperand);
break;
case FP::RoundingMode::TowardsMinusInfinity:
code.FRINTM(Sresult, Soperand);
break;
case FP::RoundingMode::TowardsZero:
code.FRINTZ(Sresult, Soperand);
break;
case FP::RoundingMode::ToNearest_TieAwayFromZero:
code.FRINTA(Sresult, Soperand);
break;
default:
ASSERT_FALSE("Invalid RoundingMode");
}
}
}
template<>
void EmitIR<IR::Opcode::FPRoundInt64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto rounding_mode = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
const bool exact = inst->GetArg(2).GetU1();
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Dresult = ctx.reg_alloc.WriteD(inst);
auto Doperand = ctx.reg_alloc.ReadD(args[0]);
RegAlloc::Realize(Dresult, Doperand);
ctx.fpsr.Load();
if (exact) {
ASSERT(ctx.FPCR().RMode() == rounding_mode);
code.FRINTX(Dresult, Doperand);
} else {
switch (rounding_mode) {
case FP::RoundingMode::ToNearest_TieEven:
code.FRINTN(Dresult, Doperand);
break;
case FP::RoundingMode::TowardsPlusInfinity:
code.FRINTP(Dresult, Doperand);
break;
case FP::RoundingMode::TowardsMinusInfinity:
code.FRINTM(Dresult, Doperand);
break;
case FP::RoundingMode::TowardsZero:
code.FRINTZ(Dresult, Doperand);
break;
case FP::RoundingMode::ToNearest_TieAwayFromZero:
code.FRINTA(Dresult, Doperand);
break;
default:
ASSERT_FALSE("Invalid RoundingMode");
}
}
}
template<>
void EmitIR<IR::Opcode::FPRSqrtEstimate16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPRSqrtEstimate32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Soperand) { code.FRSQRTE(Sresult, Soperand); });
}
template<>
void EmitIR<IR::Opcode::FPRSqrtEstimate64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Doperand) { code.FRSQRTE(Dresult, Doperand); });
}
template<>
void EmitIR<IR::Opcode::FPRSqrtStepFused16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPRSqrtStepFused32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FRSQRTS(Sresult, Sa, Sb); });
}
template<>
void EmitIR<IR::Opcode::FPRSqrtStepFused64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FRSQRTS(Dresult, Da, Db); });
}
template<>
void EmitIR<IR::Opcode::FPSqrt32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Soperand) { code.FSQRT(Sresult, Soperand); });
}
template<>
void EmitIR<IR::Opcode::FPSqrt64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Doperand) { code.FSQRT(Dresult, Doperand); });
}
template<>
void EmitIR<IR::Opcode::FPSub32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FSUB(Sresult, Sa, Sb); });
}
template<>
void EmitIR<IR::Opcode::FPSub64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FSUB(Dresult, Da, Db); });
}
template<>
void EmitIR<IR::Opcode::FPHalfToDouble>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitConvert<16, 64>(code, ctx, inst, [&](auto& Dto, auto& Hfrom) { code.FCVT(Dto, Hfrom); });
}
template<>
void EmitIR<IR::Opcode::FPHalfToSingle>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitConvert<16, 32>(code, ctx, inst, [&](auto& Sto, auto& Hfrom) { code.FCVT(Sto, Hfrom); });
}
template<>
void EmitIR<IR::Opcode::FPSingleToDouble>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitConvert<32, 64>(code, ctx, inst, [&](auto& Dto, auto& Sfrom) { code.FCVT(Dto, Sfrom); });
}
template<>
void EmitIR<IR::Opcode::FPSingleToHalf>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitConvert<32, 16>(code, ctx, inst, [&](auto& Hto, auto& Sfrom) { code.FCVT(Hto, Sfrom); });
}
template<>
void EmitIR<IR::Opcode::FPDoubleToHalf>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitConvert<64, 16>(code, ctx, inst, [&](auto& Hto, auto& Dfrom) { code.FCVT(Hto, Dfrom); });
}
template<>
void EmitIR<IR::Opcode::FPDoubleToSingle>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto rounding_mode = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
if (rounding_mode == FP::RoundingMode::ToOdd) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Sto = ctx.reg_alloc.WriteS(inst);
auto Dfrom = ctx.reg_alloc.ReadD(args[0]);
RegAlloc::Realize(Sto, Dfrom);
ctx.fpsr.Load();
code.FCVTXN(Sto, Dfrom);
return;
}
EmitConvert<64, 32>(code, ctx, inst, [&](auto& Sto, auto& Dfrom) { code.FCVT(Sto, Dfrom); });
}
template<>
void EmitIR<IR::Opcode::FPDoubleToFixedS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitToFixed<64, 16, true>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::FPDoubleToFixedS32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitToFixed<64, 32, true>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::FPDoubleToFixedS64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
// TODO: Consider fpr source
EmitToFixed<64, 64, true>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::FPDoubleToFixedU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitToFixed<64, 16, false>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::FPDoubleToFixedU32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitToFixed<64, 32, false>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::FPDoubleToFixedU64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
// TODO: Consider fpr source
EmitToFixed<64, 64, false>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::FPHalfToFixedS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPHalfToFixedS32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPHalfToFixedS64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPHalfToFixedU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPHalfToFixedU32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPHalfToFixedU64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPSingleToFixedS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitToFixed<32, 16, true>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::FPSingleToFixedS32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
// TODO: Consider fpr source
EmitToFixed<32, 32, true>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::FPSingleToFixedS64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitToFixed<32, 64, true>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::FPSingleToFixedU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitToFixed<32, 16, false>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::FPSingleToFixedU32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
// TODO: Consider fpr source
EmitToFixed<32, 32, false>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::FPSingleToFixedU64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitToFixed<32, 64, false>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::FPFixedU16ToSingle>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitFromFixed<16, 32>(code, ctx, inst, [&](auto& Sto, auto& Wfrom, u8 fbits) {
code.LSL(Wscratch0, Wfrom, 16);
code.UCVTF(Sto, Wscratch0, fbits + 16);
});
}
template<>
void EmitIR<IR::Opcode::FPFixedS16ToSingle>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitFromFixed<16, 32>(code, ctx, inst, [&](auto& Sto, auto& Wfrom, u8 fbits) {
code.LSL(Wscratch0, Wfrom, 16);
code.SCVTF(Sto, Wscratch0, fbits + 16);
});
}
template<>
void EmitIR<IR::Opcode::FPFixedU16ToDouble>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitFromFixed<16, 64>(code, ctx, inst, [&](auto& Dto, auto& Wfrom, u8 fbits) {
code.LSL(Wscratch0, Wfrom, 16);
code.UCVTF(Dto, Wscratch0, fbits + 16);
});
}
template<>
void EmitIR<IR::Opcode::FPFixedS16ToDouble>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitFromFixed<16, 64>(code, ctx, inst, [&](auto& Dto, auto& Wfrom, u8 fbits) {
code.LSL(Wscratch0, Wfrom, 16);
code.SCVTF(Dto, Wscratch0, fbits + 16);
});
}
template<>
void EmitIR<IR::Opcode::FPFixedU32ToSingle>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
// TODO: Consider fpr source
EmitFromFixed<32, 32>(code, ctx, inst, [&](auto& Sto, auto& Wfrom, u8 fbits) { fbits ? code.UCVTF(Sto, Wfrom, fbits) : code.UCVTF(Sto, Wfrom); });
}
template<>
void EmitIR<IR::Opcode::FPFixedS32ToSingle>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
// TODO: Consider fpr source
EmitFromFixed<32, 32>(code, ctx, inst, [&](auto& Sto, auto& Wfrom, u8 fbits) { fbits ? code.SCVTF(Sto, Wfrom, fbits) : code.SCVTF(Sto, Wfrom); });
}
template<>
void EmitIR<IR::Opcode::FPFixedU32ToDouble>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitFromFixed<32, 64>(code, ctx, inst, [&](auto& Dto, auto& Wfrom, u8 fbits) { fbits ? code.UCVTF(Dto, Wfrom, fbits) : code.UCVTF(Dto, Wfrom); });
}
template<>
void EmitIR<IR::Opcode::FPFixedS32ToDouble>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitFromFixed<32, 64>(code, ctx, inst, [&](auto& Dto, auto& Wfrom, u8 fbits) { fbits ? code.SCVTF(Dto, Wfrom, fbits) : code.SCVTF(Dto, Wfrom); });
}
template<>
void EmitIR<IR::Opcode::FPFixedU64ToDouble>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
// TODO: Consider fpr source
EmitFromFixed<64, 64>(code, ctx, inst, [&](auto& Dto, auto& Xfrom, u8 fbits) { fbits ? code.UCVTF(Dto, Xfrom, fbits) : code.UCVTF(Dto, Xfrom); });
}
template<>
void EmitIR<IR::Opcode::FPFixedU64ToSingle>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitFromFixed<64, 32>(code, ctx, inst, [&](auto& Sto, auto& Xfrom, u8 fbits) { fbits ? code.UCVTF(Sto, Xfrom, fbits) : code.UCVTF(Sto, Xfrom); });
}
template<>
void EmitIR<IR::Opcode::FPFixedS64ToDouble>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
// TODO: Consider fpr source
EmitFromFixed<64, 64>(code, ctx, inst, [&](auto& Dto, auto& Xfrom, u8 fbits) { fbits ? code.SCVTF(Dto, Xfrom, fbits) : code.SCVTF(Dto, Xfrom); });
}
template<>
void EmitIR<IR::Opcode::FPFixedS64ToSingle>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitFromFixed<64, 32>(code, ctx, inst, [&](auto& Sto, auto& Xfrom, u8 fbits) { fbits ? code.SCVTF(Sto, Xfrom, fbits) : code.SCVTF(Sto, Xfrom); });
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,683 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/arm64/emit_arm64_memory.h"
#include <optional>
#include <utility>
#include <mcl/bit_cast.hpp>
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/fastmem.h"
#include "dynarmic/backend/arm64/fpsr_manager.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/ir/acc_type.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
namespace {
bool IsOrdered(IR::AccType acctype) {
return acctype == IR::AccType::ORDERED || acctype == IR::AccType::ORDEREDRW || acctype == IR::AccType::LIMITEDORDERED;
}
LinkTarget ReadMemoryLinkTarget(size_t bitsize) {
switch (bitsize) {
case 8:
return LinkTarget::ReadMemory8;
case 16:
return LinkTarget::ReadMemory16;
case 32:
return LinkTarget::ReadMemory32;
case 64:
return LinkTarget::ReadMemory64;
case 128:
return LinkTarget::ReadMemory128;
}
UNREACHABLE();
}
LinkTarget WriteMemoryLinkTarget(size_t bitsize) {
switch (bitsize) {
case 8:
return LinkTarget::WriteMemory8;
case 16:
return LinkTarget::WriteMemory16;
case 32:
return LinkTarget::WriteMemory32;
case 64:
return LinkTarget::WriteMemory64;
case 128:
return LinkTarget::WriteMemory128;
}
UNREACHABLE();
}
LinkTarget WrappedReadMemoryLinkTarget(size_t bitsize) {
switch (bitsize) {
case 8:
return LinkTarget::WrappedReadMemory8;
case 16:
return LinkTarget::WrappedReadMemory16;
case 32:
return LinkTarget::WrappedReadMemory32;
case 64:
return LinkTarget::WrappedReadMemory64;
case 128:
return LinkTarget::WrappedReadMemory128;
}
UNREACHABLE();
}
LinkTarget WrappedWriteMemoryLinkTarget(size_t bitsize) {
switch (bitsize) {
case 8:
return LinkTarget::WrappedWriteMemory8;
case 16:
return LinkTarget::WrappedWriteMemory16;
case 32:
return LinkTarget::WrappedWriteMemory32;
case 64:
return LinkTarget::WrappedWriteMemory64;
case 128:
return LinkTarget::WrappedWriteMemory128;
}
UNREACHABLE();
}
LinkTarget ExclusiveReadMemoryLinkTarget(size_t bitsize) {
switch (bitsize) {
case 8:
return LinkTarget::ExclusiveReadMemory8;
case 16:
return LinkTarget::ExclusiveReadMemory16;
case 32:
return LinkTarget::ExclusiveReadMemory32;
case 64:
return LinkTarget::ExclusiveReadMemory64;
case 128:
return LinkTarget::ExclusiveReadMemory128;
}
UNREACHABLE();
}
LinkTarget ExclusiveWriteMemoryLinkTarget(size_t bitsize) {
switch (bitsize) {
case 8:
return LinkTarget::ExclusiveWriteMemory8;
case 16:
return LinkTarget::ExclusiveWriteMemory16;
case 32:
return LinkTarget::ExclusiveWriteMemory32;
case 64:
return LinkTarget::ExclusiveWriteMemory64;
case 128:
return LinkTarget::ExclusiveWriteMemory128;
}
UNREACHABLE();
}
template<size_t bitsize>
void CallbackOnlyEmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall({}, args[1]);
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
EmitRelocation(code, ctx, ReadMemoryLinkTarget(bitsize));
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
if constexpr (bitsize == 128) {
code.MOV(Q8.B16(), Q0.B16());
ctx.reg_alloc.DefineAsRegister(inst, Q8);
} else {
ctx.reg_alloc.DefineAsRegister(inst, X0);
}
}
template<size_t bitsize>
void CallbackOnlyEmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall({}, args[1]);
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
code.MOV(Wscratch0, 1);
code.STRB(Wscratch0, Xstate, ctx.conf.state_exclusive_state_offset);
EmitRelocation(code, ctx, ExclusiveReadMemoryLinkTarget(bitsize));
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
if constexpr (bitsize == 128) {
code.MOV(Q8.B16(), Q0.B16());
ctx.reg_alloc.DefineAsRegister(inst, Q8);
} else {
ctx.reg_alloc.DefineAsRegister(inst, X0);
}
}
template<size_t bitsize>
void CallbackOnlyEmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall({}, args[1], args[2]);
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
EmitRelocation(code, ctx, WriteMemoryLinkTarget(bitsize));
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
}
template<size_t bitsize>
void CallbackOnlyEmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.PrepareForCall({}, args[1], args[2]);
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
oaknut::Label end;
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
code.MOV(W0, 1);
code.LDRB(Wscratch0, Xstate, ctx.conf.state_exclusive_state_offset);
code.CBZ(Wscratch0, end);
code.STRB(WZR, Xstate, ctx.conf.state_exclusive_state_offset);
EmitRelocation(code, ctx, ExclusiveWriteMemoryLinkTarget(bitsize));
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
code.l(end);
ctx.reg_alloc.DefineAsRegister(inst, X0);
}
constexpr size_t page_bits = 12;
constexpr size_t page_size = 1 << page_bits;
constexpr size_t page_mask = (1 << page_bits) - 1;
// This function may use Xscratch0 as a scratch register
// Trashes NZCV
template<size_t bitsize>
void EmitDetectMisalignedVAddr(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) {
static_assert(bitsize == 8 || bitsize == 16 || bitsize == 32 || bitsize == 64 || bitsize == 128);
if (bitsize == 8 || (ctx.conf.detect_misaligned_access_via_page_table & bitsize) == 0) {
return;
}
if (!ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) {
const u64 align_mask = []() -> u64 {
switch (bitsize) {
case 16:
return 0b1;
case 32:
return 0b11;
case 64:
return 0b111;
case 128:
return 0b1111;
default:
UNREACHABLE();
}
}();
code.TST(Xaddr, align_mask);
code.B(NE, *fallback);
} else {
// If (addr & page_mask) > page_size - byte_size, use fallback.
code.AND(Xscratch0, Xaddr, page_mask);
code.CMP(Xscratch0, page_size - bitsize / 8);
code.B(HI, *fallback);
}
}
// Outputs Xscratch0 = page_table[addr >> page_bits]
// May use Xscratch1 as scratch register
// Address to read/write = [ret0 + ret1], ret0 is always Xscratch0 and ret1 is either Xaddr or Xscratch1
// Trashes NZCV
template<size_t bitsize>
std::pair<oaknut::XReg, oaknut::XReg> InlinePageTableEmitVAddrLookup(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) {
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits;
const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits;
EmitDetectMisalignedVAddr<bitsize>(code, ctx, Xaddr, fallback);
if (ctx.conf.silently_mirror_page_table || unused_top_bits == 0) {
code.UBFX(Xscratch0, Xaddr, page_bits, valid_page_index_bits);
} else {
code.LSR(Xscratch0, Xaddr, page_bits);
code.TST(Xscratch0, u64(~u64(0)) << valid_page_index_bits);
code.B(NE, *fallback);
}
code.LDR(Xscratch0, Xpagetable, Xscratch0, LSL, 3);
if (ctx.conf.page_table_pointer_mask_bits != 0) {
const u64 mask = u64(~u64(0)) << ctx.conf.page_table_pointer_mask_bits;
code.AND(Xscratch0, Xscratch0, mask);
}
code.CBZ(Xscratch0, *fallback);
if (ctx.conf.absolute_offset_page_table) {
return std::make_pair(Xscratch0, Xaddr);
}
code.AND(Xscratch1, Xaddr, page_mask);
return std::make_pair(Xscratch0, Xscratch1);
}
template<std::size_t bitsize>
CodePtr EmitMemoryLdr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered, bool extend32 = false) {
const auto index_ext = extend32 ? oaknut::IndexExt::UXTW : oaknut::IndexExt::LSL;
const auto add_ext = extend32 ? oaknut::AddSubExt::UXTW : oaknut::AddSubExt::LSL;
const auto Roffset = extend32 ? oaknut::RReg{Xoffset.toW()} : oaknut::RReg{Xoffset};
CodePtr fastmem_location = code.xptr<CodePtr>();
if (ordered) {
code.ADD(Xscratch0, Xbase, Roffset, add_ext);
fastmem_location = code.xptr<CodePtr>();
switch (bitsize) {
case 8:
code.LDARB(oaknut::WReg{value_idx}, Xscratch0);
break;
case 16:
code.LDARH(oaknut::WReg{value_idx}, Xscratch0);
break;
case 32:
code.LDAR(oaknut::WReg{value_idx}, Xscratch0);
break;
case 64:
code.LDAR(oaknut::XReg{value_idx}, Xscratch0);
break;
case 128:
code.LDR(oaknut::QReg{value_idx}, Xscratch0);
code.DMB(oaknut::BarrierOp::ISH);
break;
default:
ASSERT_FALSE("Invalid bitsize");
}
} else {
fastmem_location = code.xptr<CodePtr>();
switch (bitsize) {
case 8:
code.LDRB(oaknut::WReg{value_idx}, Xbase, Roffset, index_ext);
break;
case 16:
code.LDRH(oaknut::WReg{value_idx}, Xbase, Roffset, index_ext);
break;
case 32:
code.LDR(oaknut::WReg{value_idx}, Xbase, Roffset, index_ext);
break;
case 64:
code.LDR(oaknut::XReg{value_idx}, Xbase, Roffset, index_ext);
break;
case 128:
code.LDR(oaknut::QReg{value_idx}, Xbase, Roffset, index_ext);
break;
default:
ASSERT_FALSE("Invalid bitsize");
}
}
return fastmem_location;
}
template<std::size_t bitsize>
CodePtr EmitMemoryStr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered, bool extend32 = false) {
const auto index_ext = extend32 ? oaknut::IndexExt::UXTW : oaknut::IndexExt::LSL;
const auto add_ext = extend32 ? oaknut::AddSubExt::UXTW : oaknut::AddSubExt::LSL;
const auto Roffset = extend32 ? oaknut::RReg{Xoffset.toW()} : oaknut::RReg{Xoffset};
CodePtr fastmem_location;
if (ordered) {
code.ADD(Xscratch0, Xbase, Roffset, add_ext);
fastmem_location = code.xptr<CodePtr>();
switch (bitsize) {
case 8:
code.STLRB(oaknut::WReg{value_idx}, Xscratch0);
break;
case 16:
code.STLRH(oaknut::WReg{value_idx}, Xscratch0);
break;
case 32:
code.STLR(oaknut::WReg{value_idx}, Xscratch0);
break;
case 64:
code.STLR(oaknut::XReg{value_idx}, Xscratch0);
break;
case 128:
code.DMB(oaknut::BarrierOp::ISH);
code.STR(oaknut::QReg{value_idx}, Xscratch0);
code.DMB(oaknut::BarrierOp::ISH);
break;
default:
ASSERT_FALSE("Invalid bitsize");
}
} else {
fastmem_location = code.xptr<CodePtr>();
switch (bitsize) {
case 8:
code.STRB(oaknut::WReg{value_idx}, Xbase, Roffset, index_ext);
break;
case 16:
code.STRH(oaknut::WReg{value_idx}, Xbase, Roffset, index_ext);
break;
case 32:
code.STR(oaknut::WReg{value_idx}, Xbase, Roffset, index_ext);
break;
case 64:
code.STR(oaknut::XReg{value_idx}, Xbase, Roffset, index_ext);
break;
case 128:
code.STR(oaknut::QReg{value_idx}, Xbase, Roffset, index_ext);
break;
default:
ASSERT_FALSE("Invalid bitsize");
}
}
return fastmem_location;
}
template<size_t bitsize>
void InlinePageTableEmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xaddr = ctx.reg_alloc.ReadX(args[1]);
auto Rvalue = [&] {
if constexpr (bitsize == 128) {
return ctx.reg_alloc.WriteQ(inst);
} else {
return ctx.reg_alloc.WriteReg<std::max<std::size_t>(bitsize, 32)>(inst);
}
}();
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
ctx.fpsr.Spill();
ctx.reg_alloc.SpillFlags();
RegAlloc::Realize(Xaddr, Rvalue);
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
const auto [Xbase, Xoffset] = InlinePageTableEmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
EmitMemoryLdr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered);
ctx.deferred_emits.emplace_back([&code, &ctx, inst, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end] {
code.l(*fallback);
code.MOV(Xscratch0, Xaddr);
EmitRelocation(code, ctx, WrappedReadMemoryLinkTarget(bitsize));
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
if constexpr (bitsize == 128) {
code.MOV(Rvalue.B16(), Q0.B16());
} else {
code.MOV(Rvalue.toX(), Xscratch0);
}
ctx.conf.emit_check_memory_abort(code, ctx, inst, *end);
code.B(*end);
});
code.l(*end);
}
template<size_t bitsize>
void InlinePageTableEmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xaddr = ctx.reg_alloc.ReadX(args[1]);
auto Rvalue = [&] {
if constexpr (bitsize == 128) {
return ctx.reg_alloc.ReadQ(args[2]);
} else {
return ctx.reg_alloc.ReadReg<std::max<std::size_t>(bitsize, 32)>(args[2]);
}
}();
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
ctx.fpsr.Spill();
ctx.reg_alloc.SpillFlags();
RegAlloc::Realize(Xaddr, Rvalue);
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
const auto [Xbase, Xoffset] = InlinePageTableEmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
EmitMemoryStr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered);
ctx.deferred_emits.emplace_back([&code, &ctx, inst, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end] {
code.l(*fallback);
if constexpr (bitsize == 128) {
code.MOV(Xscratch0, Xaddr);
code.MOV(Q0.B16(), Rvalue.B16());
} else {
code.MOV(Xscratch0, Xaddr);
code.MOV(Xscratch1, Rvalue.toX());
}
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
EmitRelocation(code, ctx, WrappedWriteMemoryLinkTarget(bitsize));
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
ctx.conf.emit_check_memory_abort(code, ctx, inst, *end);
code.B(*end);
});
code.l(*end);
}
std::optional<DoNotFastmemMarker> ShouldFastmem(EmitContext& ctx, IR::Inst* inst) {
if (!ctx.conf.fastmem_pointer || !ctx.fastmem.SupportsFastmem()) {
return std::nullopt;
}
const auto marker = std::make_tuple(ctx.block.Location(), inst->GetName());
if (ctx.fastmem.ShouldFastmem(marker)) {
return marker;
}
return std::nullopt;
}
inline bool ShouldExt32(EmitContext& ctx) {
return ctx.conf.fastmem_address_space_bits == 32 && ctx.conf.silently_mirror_fastmem;
}
// May use Xscratch0 as scratch register
// Address to read/write = [ret0 + ret1], ret0 is always Xfastmem and ret1 is either Xaddr or Xscratch0
// Trashes NZCV
template<size_t bitsize>
std::pair<oaknut::XReg, oaknut::XReg> FastmemEmitVAddrLookup(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) {
if (ctx.conf.fastmem_address_space_bits == 64 || ShouldExt32(ctx)) {
return std::make_pair(Xfastmem, Xaddr);
}
if (ctx.conf.silently_mirror_fastmem) {
code.UBFX(Xscratch0, Xaddr, 0, ctx.conf.fastmem_address_space_bits);
return std::make_pair(Xfastmem, Xscratch0);
}
code.LSR(Xscratch0, Xaddr, ctx.conf.fastmem_address_space_bits);
code.CBNZ(Xscratch0, *fallback);
return std::make_pair(Xfastmem, Xaddr);
}
template<size_t bitsize>
void FastmemEmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, DoNotFastmemMarker marker) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xaddr = ctx.reg_alloc.ReadX(args[1]);
auto Rvalue = [&] {
if constexpr (bitsize == 128) {
return ctx.reg_alloc.WriteQ(inst);
} else {
return ctx.reg_alloc.WriteReg<std::max<std::size_t>(bitsize, 32)>(inst);
}
}();
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
ctx.fpsr.Spill();
ctx.reg_alloc.SpillFlags();
RegAlloc::Realize(Xaddr, Rvalue);
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
const auto [Xbase, Xoffset] = FastmemEmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
const auto fastmem_location = EmitMemoryLdr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered, ShouldExt32(ctx));
ctx.deferred_emits.emplace_back([&code, &ctx, inst, marker, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end, fastmem_location] {
ctx.ebi.fastmem_patch_info.emplace(
fastmem_location - ctx.ebi.entry_point,
FastmemPatchInfo{
.marker = marker,
.fc = FakeCall{
.call_pc = mcl::bit_cast<u64>(code.xptr<void*>()),
},
.recompile = ctx.conf.recompile_on_fastmem_failure,
});
code.l(*fallback);
code.MOV(Xscratch0, Xaddr);
EmitRelocation(code, ctx, WrappedReadMemoryLinkTarget(bitsize));
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
if constexpr (bitsize == 128) {
code.MOV(Rvalue.B16(), Q0.B16());
} else {
code.MOV(Rvalue.toX(), Xscratch0);
}
ctx.conf.emit_check_memory_abort(code, ctx, inst, *end);
code.B(*end);
});
code.l(*end);
}
template<size_t bitsize>
void FastmemEmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, DoNotFastmemMarker marker) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xaddr = ctx.reg_alloc.ReadX(args[1]);
auto Rvalue = [&] {
if constexpr (bitsize == 128) {
return ctx.reg_alloc.ReadQ(args[2]);
} else {
return ctx.reg_alloc.ReadReg<std::max<std::size_t>(bitsize, 32)>(args[2]);
}
}();
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
ctx.fpsr.Spill();
ctx.reg_alloc.SpillFlags();
RegAlloc::Realize(Xaddr, Rvalue);
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
const auto [Xbase, Xoffset] = FastmemEmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
const auto fastmem_location = EmitMemoryStr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered, ShouldExt32(ctx));
ctx.deferred_emits.emplace_back([&code, &ctx, inst, marker, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end, fastmem_location] {
ctx.ebi.fastmem_patch_info.emplace(
fastmem_location - ctx.ebi.entry_point,
FastmemPatchInfo{
.marker = marker,
.fc = FakeCall{
.call_pc = mcl::bit_cast<u64>(code.xptr<void*>()),
},
.recompile = ctx.conf.recompile_on_fastmem_failure,
});
code.l(*fallback);
if constexpr (bitsize == 128) {
code.MOV(Xscratch0, Xaddr);
code.MOV(Q0.B16(), Rvalue.B16());
} else {
code.MOV(Xscratch0, Xaddr);
code.MOV(Xscratch1, Rvalue.toX());
}
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
EmitRelocation(code, ctx, WrappedWriteMemoryLinkTarget(bitsize));
if (ordered) {
code.DMB(oaknut::BarrierOp::ISH);
}
ctx.conf.emit_check_memory_abort(code, ctx, inst, *end);
code.B(*end);
});
code.l(*end);
}
} // namespace
template<size_t bitsize>
void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
if (const auto marker = ShouldFastmem(ctx, inst)) {
FastmemEmitReadMemory<bitsize>(code, ctx, inst, *marker);
} else if (ctx.conf.page_table_pointer != 0) {
InlinePageTableEmitReadMemory<bitsize>(code, ctx, inst);
} else {
CallbackOnlyEmitReadMemory<bitsize>(code, ctx, inst);
}
}
template<size_t bitsize>
void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
CallbackOnlyEmitExclusiveReadMemory<bitsize>(code, ctx, inst);
}
template<size_t bitsize>
void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
if (const auto marker = ShouldFastmem(ctx, inst)) {
FastmemEmitWriteMemory<bitsize>(code, ctx, inst, *marker);
} else if (ctx.conf.page_table_pointer != 0) {
InlinePageTableEmitWriteMemory<bitsize>(code, ctx, inst);
} else {
CallbackOnlyEmitWriteMemory<bitsize>(code, ctx, inst);
}
}
template<size_t bitsize>
void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
CallbackOnlyEmitExclusiveWriteMemory<bitsize>(code, ctx, inst);
}
template void EmitReadMemory<8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template void EmitReadMemory<16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template void EmitReadMemory<32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template void EmitReadMemory<64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template void EmitReadMemory<128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template void EmitExclusiveReadMemory<8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template void EmitExclusiveReadMemory<16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template void EmitExclusiveReadMemory<32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template void EmitExclusiveReadMemory<64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template void EmitExclusiveReadMemory<128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template void EmitWriteMemory<8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template void EmitWriteMemory<16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template void EmitWriteMemory<32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template void EmitWriteMemory<64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template void EmitWriteMemory<128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template void EmitExclusiveWriteMemory<8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template void EmitExclusiveWriteMemory<16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template void EmitExclusiveWriteMemory<32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template void EmitExclusiveWriteMemory<64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template void EmitExclusiveWriteMemory<128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,35 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/common/common_types.h"
namespace oaknut {
struct CodeGenerator;
struct Label;
} // namespace oaknut
namespace Dynarmic::IR {
enum class AccType;
class Inst;
} // namespace Dynarmic::IR
namespace Dynarmic::Backend::Arm64 {
struct EmitContext;
enum class LinkTarget;
template<size_t bitsize>
void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template<size_t bitsize>
void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template<size_t bitsize>
void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
template<size_t bitsize>
void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,409 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/a32_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/fpsr_manager.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
template<typename EmitFn>
static void EmitPackedOp(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Vresult = ctx.reg_alloc.WriteD(inst);
auto Va = ctx.reg_alloc.ReadD(args[0]);
auto Vb = ctx.reg_alloc.ReadD(args[1]);
RegAlloc::Realize(Vresult, Va, Vb);
emit(Vresult, Va, Vb);
}
template<typename EmitFn>
static void EmitSaturatedPackedOp(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Vresult = ctx.reg_alloc.WriteD(inst);
auto Va = ctx.reg_alloc.ReadD(args[0]);
auto Vb = ctx.reg_alloc.ReadD(args[1]);
RegAlloc::Realize(Vresult, Va, Vb);
ctx.fpsr.Spill();
emit(Vresult, Va, Vb);
}
template<>
void EmitIR<IR::Opcode::PackedAddU8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Vresult = ctx.reg_alloc.WriteD(inst);
auto Va = ctx.reg_alloc.ReadD(args[0]);
auto Vb = ctx.reg_alloc.ReadD(args[1]);
RegAlloc::Realize(Vresult, Va, Vb);
code.ADD(Vresult->B8(), Va->B8(), Vb->B8());
if (ge_inst) {
auto Vge = ctx.reg_alloc.WriteD(ge_inst);
RegAlloc::Realize(Vge);
code.CMHI(Vge->B8(), Va->B8(), Vresult->B8());
}
}
template<>
void EmitIR<IR::Opcode::PackedAddS8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Vresult = ctx.reg_alloc.WriteD(inst);
auto Va = ctx.reg_alloc.ReadD(args[0]);
auto Vb = ctx.reg_alloc.ReadD(args[1]);
RegAlloc::Realize(Vresult, Va, Vb);
code.ADD(Vresult->B8(), Va->B8(), Vb->B8());
if (ge_inst) {
auto Vge = ctx.reg_alloc.WriteD(ge_inst);
RegAlloc::Realize(Vge);
code.SHADD(Vge->B8(), Va->B8(), Vb->B8());
code.CMGE(Vge->B8(), Vge->B8(), 0);
}
}
template<>
void EmitIR<IR::Opcode::PackedSubU8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Vresult = ctx.reg_alloc.WriteD(inst);
auto Va = ctx.reg_alloc.ReadD(args[0]);
auto Vb = ctx.reg_alloc.ReadD(args[1]);
RegAlloc::Realize(Vresult, Va, Vb);
code.SUB(Vresult->B8(), Va->B8(), Vb->B8());
if (ge_inst) {
auto Vge = ctx.reg_alloc.WriteD(ge_inst);
RegAlloc::Realize(Vge);
code.UHSUB(Vge->B8(), Va->B8(), Vb->B8());
code.CMGE(Vge->B8(), Vge->B8(), 0);
}
}
template<>
void EmitIR<IR::Opcode::PackedSubS8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Vresult = ctx.reg_alloc.WriteD(inst);
auto Va = ctx.reg_alloc.ReadD(args[0]);
auto Vb = ctx.reg_alloc.ReadD(args[1]);
RegAlloc::Realize(Vresult, Va, Vb);
code.SUB(Vresult->B8(), Va->B8(), Vb->B8());
if (ge_inst) {
auto Vge = ctx.reg_alloc.WriteD(ge_inst);
RegAlloc::Realize(Vge);
code.SHSUB(Vge->B8(), Va->B8(), Vb->B8());
code.CMGE(Vge->B8(), Vge->B8(), 0);
}
}
template<>
void EmitIR<IR::Opcode::PackedAddU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Vresult = ctx.reg_alloc.WriteD(inst);
auto Va = ctx.reg_alloc.ReadD(args[0]);
auto Vb = ctx.reg_alloc.ReadD(args[1]);
RegAlloc::Realize(Vresult, Va, Vb);
code.ADD(Vresult->H4(), Va->H4(), Vb->H4());
if (ge_inst) {
auto Vge = ctx.reg_alloc.WriteD(ge_inst);
RegAlloc::Realize(Vge);
code.CMHI(Vge->H4(), Va->H4(), Vresult->H4());
}
}
template<>
void EmitIR<IR::Opcode::PackedAddS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Vresult = ctx.reg_alloc.WriteD(inst);
auto Va = ctx.reg_alloc.ReadD(args[0]);
auto Vb = ctx.reg_alloc.ReadD(args[1]);
RegAlloc::Realize(Vresult, Va, Vb);
code.ADD(Vresult->H4(), Va->H4(), Vb->H4());
if (ge_inst) {
auto Vge = ctx.reg_alloc.WriteD(ge_inst);
RegAlloc::Realize(Vge);
code.SHADD(Vge->H4(), Va->H4(), Vb->H4());
code.CMGE(Vge->H4(), Vge->H4(), 0);
}
}
template<>
void EmitIR<IR::Opcode::PackedSubU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Vresult = ctx.reg_alloc.WriteD(inst);
auto Va = ctx.reg_alloc.ReadD(args[0]);
auto Vb = ctx.reg_alloc.ReadD(args[1]);
RegAlloc::Realize(Vresult, Va, Vb);
code.SUB(Vresult->H4(), Va->H4(), Vb->H4());
if (ge_inst) {
auto Vge = ctx.reg_alloc.WriteD(ge_inst);
RegAlloc::Realize(Vge);
code.UHSUB(Vge->H4(), Va->H4(), Vb->H4());
code.CMGE(Vge->H4(), Vge->H4(), 0);
}
}
template<>
void EmitIR<IR::Opcode::PackedSubS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Vresult = ctx.reg_alloc.WriteD(inst);
auto Va = ctx.reg_alloc.ReadD(args[0]);
auto Vb = ctx.reg_alloc.ReadD(args[1]);
RegAlloc::Realize(Vresult, Va, Vb);
code.SUB(Vresult->H4(), Va->H4(), Vb->H4());
if (ge_inst) {
auto Vge = ctx.reg_alloc.WriteD(ge_inst);
RegAlloc::Realize(Vge);
code.SHSUB(Vge->H4(), Va->H4(), Vb->H4());
code.CMGE(Vge->H4(), Vge->H4(), 0);
}
}
template<bool add_is_hi, bool is_signed, bool is_halving>
static void EmitPackedAddSub(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Vresult = ctx.reg_alloc.WriteD(inst);
auto Va = ctx.reg_alloc.ReadD(args[0]);
auto Vb = ctx.reg_alloc.ReadD(args[1]);
RegAlloc::Realize(Vresult, Va, Vb);
if (is_signed) {
code.SXTL(V0.S4(), Va->H4());
code.SXTL(V1.S4(), Vb->H4());
} else {
code.UXTL(V0.S4(), Va->H4());
code.UXTL(V1.S4(), Vb->H4());
}
code.EXT(V1.B8(), V1.B8(), V1.B8(), 4);
code.MOVI(D2, oaknut::RepImm{add_is_hi ? 0b11110000 : 0b00001111});
code.EOR(V1.B8(), V1.B8(), V2.B8());
code.SUB(V1.S2(), V1.S2(), V2.S2());
code.SUB(Vresult->S2(), V0.S2(), V1.S2());
if (is_halving) {
if (is_signed) {
code.SSHR(Vresult->S2(), Vresult->S2(), 1);
} else {
code.USHR(Vresult->S2(), Vresult->S2(), 1);
}
}
if (ge_inst) {
ASSERT(!is_halving);
auto Vge = ctx.reg_alloc.WriteD(ge_inst);
RegAlloc::Realize(Vge);
if (is_signed) {
code.CMGE(Vge->S2(), Vresult->S2(), 0);
code.XTN(Vge->H4(), Vge->toQ().S4());
} else {
code.CMEQ(Vge->H4(), Vresult->H4(), 0);
code.EOR(Vge->B8(), Vge->B8(), V2.B8());
code.SHRN(Vge->H4(), Vge->toQ().S4(), 16);
}
}
code.XTN(Vresult->H4(), Vresult->toQ().S4());
}
template<>
void EmitIR<IR::Opcode::PackedAddSubU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitPackedAddSub<true, false, false>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::PackedAddSubS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitPackedAddSub<true, true, false>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::PackedSubAddU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitPackedAddSub<false, false, false>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::PackedSubAddS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitPackedAddSub<false, true, false>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::PackedHalvingAddU8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.UHADD(Vresult->B8(), Va->B8(), Vb->B8()); });
}
template<>
void EmitIR<IR::Opcode::PackedHalvingAddS8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.SHADD(Vresult->B8(), Va->B8(), Vb->B8()); });
}
template<>
void EmitIR<IR::Opcode::PackedHalvingSubU8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.UHSUB(Vresult->B8(), Va->B8(), Vb->B8()); });
}
template<>
void EmitIR<IR::Opcode::PackedHalvingSubS8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.SHSUB(Vresult->B8(), Va->B8(), Vb->B8()); });
}
template<>
void EmitIR<IR::Opcode::PackedHalvingAddU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.UHADD(Vresult->H4(), Va->H4(), Vb->H4()); });
}
template<>
void EmitIR<IR::Opcode::PackedHalvingAddS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.SHADD(Vresult->H4(), Va->H4(), Vb->H4()); });
}
template<>
void EmitIR<IR::Opcode::PackedHalvingSubU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.UHSUB(Vresult->H4(), Va->H4(), Vb->H4()); });
}
template<>
void EmitIR<IR::Opcode::PackedHalvingSubS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.SHSUB(Vresult->H4(), Va->H4(), Vb->H4()); });
}
template<>
void EmitIR<IR::Opcode::PackedHalvingAddSubU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitPackedAddSub<true, false, true>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::PackedHalvingAddSubS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitPackedAddSub<true, true, true>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::PackedHalvingSubAddU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitPackedAddSub<false, false, true>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::PackedHalvingSubAddS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitPackedAddSub<false, true, true>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::PackedSaturatedAddU8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitSaturatedPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.UQADD(Vresult->B8(), Va->B8(), Vb->B8()); });
}
template<>
void EmitIR<IR::Opcode::PackedSaturatedAddS8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitSaturatedPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.SQADD(Vresult->B8(), Va->B8(), Vb->B8()); });
}
template<>
void EmitIR<IR::Opcode::PackedSaturatedSubU8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitSaturatedPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.UQSUB(Vresult->B8(), Va->B8(), Vb->B8()); });
}
template<>
void EmitIR<IR::Opcode::PackedSaturatedSubS8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitSaturatedPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.SQSUB(Vresult->B8(), Va->B8(), Vb->B8()); });
}
template<>
void EmitIR<IR::Opcode::PackedSaturatedAddU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitSaturatedPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.UQADD(Vresult->H4(), Va->H4(), Vb->H4()); });
}
template<>
void EmitIR<IR::Opcode::PackedSaturatedAddS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitSaturatedPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.SQADD(Vresult->H4(), Va->H4(), Vb->H4()); });
}
template<>
void EmitIR<IR::Opcode::PackedSaturatedSubU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitSaturatedPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.UQSUB(Vresult->H4(), Va->H4(), Vb->H4()); });
}
template<>
void EmitIR<IR::Opcode::PackedSaturatedSubS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitSaturatedPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.SQSUB(Vresult->H4(), Va->H4(), Vb->H4()); });
}
template<>
void EmitIR<IR::Opcode::PackedAbsDiffSumU8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) {
code.MOVI(D2, oaknut::RepImm{0b00001111});
code.UABD(Vresult->B8(), Va->B8(), Vb->B8());
code.AND(Vresult->B8(), Vresult->B8(), V2.B8()); // TODO: Zext tracking
code.UADDLV(Vresult->toH(), Vresult->B8());
});
}
template<>
void EmitIR<IR::Opcode::PackedSelect>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Vresult = ctx.reg_alloc.WriteD(inst);
auto Vge = ctx.reg_alloc.ReadD(args[0]);
auto Va = ctx.reg_alloc.ReadD(args[1]);
auto Vb = ctx.reg_alloc.ReadD(args[2]);
RegAlloc::Realize(Vresult, Vge, Va, Vb);
code.FMOV(Vresult, Vge); // TODO: Move elimination
code.BSL(Vresult->B8(), Vb->B8(), Va->B8());
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,273 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/a32_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
template<>
void EmitIR<IR::Opcode::SignedSaturatedAddWithFlag32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
ASSERT(overflow_inst);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wresult = ctx.reg_alloc.WriteW(inst);
auto Wa = ctx.reg_alloc.ReadW(args[0]);
auto Wb = ctx.reg_alloc.ReadW(args[1]);
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
RegAlloc::Realize(Wresult, Wa, Wb, Woverflow);
ctx.reg_alloc.SpillFlags();
code.ADDS(Wresult, *Wa, Wb);
code.ASR(Wscratch0, Wresult, 31);
code.EOR(Wscratch0, Wscratch0, 0x8000'0000);
code.CSEL(Wresult, Wresult, Wscratch0, VC);
code.CSET(Woverflow, VS);
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedSubWithFlag32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
ASSERT(overflow_inst);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wresult = ctx.reg_alloc.WriteW(inst);
auto Wa = ctx.reg_alloc.ReadW(args[0]);
auto Wb = ctx.reg_alloc.ReadW(args[1]);
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
RegAlloc::Realize(Wresult, Wa, Wb, Woverflow);
ctx.reg_alloc.SpillFlags();
code.SUBS(Wresult, *Wa, Wb);
code.ASR(Wscratch0, Wresult, 31);
code.EOR(Wscratch0, Wscratch0, 0x8000'0000);
code.CSEL(Wresult, Wresult, Wscratch0, VC);
code.CSET(Woverflow, VS);
}
template<>
void EmitIR<IR::Opcode::SignedSaturation>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const size_t N = args[1].GetImmediateU8();
ASSERT(N >= 1 && N <= 32);
if (N == 32) {
ctx.reg_alloc.DefineAsExisting(inst, args[0]);
if (overflow_inst) {
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
RegAlloc::Realize(Woverflow);
code.MOV(*Woverflow, WZR);
}
return;
}
const u32 positive_saturated_value = (1u << (N - 1)) - 1;
const u32 negative_saturated_value = ~u32{0} << (N - 1);
auto Woperand = ctx.reg_alloc.ReadW(args[0]);
auto Wresult = ctx.reg_alloc.WriteW(inst);
RegAlloc::Realize(Woperand, Wresult);
ctx.reg_alloc.SpillFlags();
code.MOV(Wscratch0, negative_saturated_value);
code.MOV(Wscratch1, positive_saturated_value);
code.CMP(*Woperand, Wscratch0);
code.CSEL(Wresult, Woperand, Wscratch0, GT);
code.CMP(*Woperand, Wscratch1);
code.CSEL(Wresult, Wresult, Wscratch1, LT);
if (overflow_inst) {
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
RegAlloc::Realize(Woverflow);
code.CMP(*Wresult, Woperand);
code.CSET(Woverflow, NE);
}
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturation>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Wresult = ctx.reg_alloc.WriteW(inst);
auto Woperand = ctx.reg_alloc.ReadW(args[0]);
RegAlloc::Realize(Wresult, Woperand);
ctx.reg_alloc.SpillFlags();
const size_t N = args[1].GetImmediateU8();
ASSERT(N <= 31);
const u32 saturated_value = (1u << N) - 1;
code.MOV(Wscratch0, saturated_value);
code.CMP(*Woperand, 0);
code.CSEL(Wresult, Woperand, WZR, GT);
code.CMP(*Woperand, Wscratch0);
code.CSEL(Wresult, Wresult, Wscratch0, LT);
if (overflow_inst) {
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
RegAlloc::Realize(Woverflow);
code.CSET(Woverflow, HI);
}
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedAdd8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedAdd16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedAdd32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedAdd64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedDoublingMultiplyReturnHigh16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedDoublingMultiplyReturnHigh32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedSub8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedSub16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedSub32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedSub64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturatedAdd8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturatedAdd16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturatedAdd32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturatedAdd64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturatedSub8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturatedSub16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturatedSub32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturatedSub64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
} // namespace Dynarmic::Backend::Arm64

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,791 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <mcl/bit_cast.hpp>
#include <mcl/mp/metavalue/lift_value.hpp>
#include <mcl/mp/typelist/cartesian_product.hpp>
#include <mcl/mp/typelist/get.hpp>
#include <mcl/mp/typelist/lift_sequence.hpp>
#include <mcl/mp/typelist/list.hpp>
#include <mcl/mp/typelist/lower_to_tuple.hpp>
#include <mcl/type_traits/function_info.hpp>
#include <mcl/type_traits/integer_of_size.hpp>
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/a32_jitstate.h"
#include "dynarmic/backend/arm64/a64_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/fpsr_manager.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/common/always_false.h"
#include "dynarmic/common/cast_util.h"
#include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/info.h"
#include "dynarmic/common/fp/op.h"
#include "dynarmic/common/fp/rounding_mode.h"
#include "dynarmic/common/lut_from_list.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
namespace mp = mcl::mp;
using A64FullVectorWidth = std::integral_constant<size_t, 128>;
// Array alias that always sizes itself according to the given type T
// relative to the size of a vector register. e.g. T = u32 would result
// in a std::array<u32, 4>.
template<typename T>
using VectorArray = std::array<T, A64FullVectorWidth::value / mcl::bitsizeof<T>>;
template<typename EmitFn>
static void MaybeStandardFPSCRValue(oaknut::CodeGenerator& code, EmitContext& ctx, bool fpcr_controlled, EmitFn emit) {
if (ctx.FPCR(fpcr_controlled) != ctx.FPCR()) {
code.MOV(Wscratch0, ctx.FPCR(fpcr_controlled).Value());
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
emit();
code.MOV(Wscratch0, ctx.FPCR().Value());
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
} else {
emit();
}
}
template<typename EmitFn>
static void EmitTwoOp(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Qresult = ctx.reg_alloc.WriteQ(inst);
auto Qa = ctx.reg_alloc.ReadQ(args[0]);
const bool fpcr_controlled = args[1].IsVoid() || args[1].GetImmediateU1();
RegAlloc::Realize(Qresult, Qa);
ctx.fpsr.Load();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { emit(Qresult, Qa); });
}
template<size_t size, typename EmitFn>
static void EmitTwoOpArranged(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
EmitTwoOp(code, ctx, inst, [&](auto& Qresult, auto& Qa) {
if constexpr (size == 16) {
emit(Qresult->H8(), Qa->H8());
} else if constexpr (size == 32) {
emit(Qresult->S4(), Qa->S4());
} else if constexpr (size == 64) {
emit(Qresult->D2(), Qa->D2());
} else {
static_assert(Common::always_false_v<mcl::mp::lift_value<size>>);
}
});
}
template<typename EmitFn>
static void EmitThreeOp(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Qresult = ctx.reg_alloc.WriteQ(inst);
auto Qa = ctx.reg_alloc.ReadQ(args[0]);
auto Qb = ctx.reg_alloc.ReadQ(args[1]);
const bool fpcr_controlled = args[2].GetImmediateU1();
RegAlloc::Realize(Qresult, Qa, Qb);
ctx.fpsr.Load();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { emit(Qresult, Qa, Qb); });
}
template<size_t size, typename EmitFn>
static void EmitThreeOpArranged(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
EmitThreeOp(code, ctx, inst, [&](auto& Qresult, auto& Qa, auto& Qb) {
if constexpr (size == 16) {
emit(Qresult->H8(), Qa->H8(), Qb->H8());
} else if constexpr (size == 32) {
emit(Qresult->S4(), Qa->S4(), Qb->S4());
} else if constexpr (size == 64) {
emit(Qresult->D2(), Qa->D2(), Qb->D2());
} else {
static_assert(Common::always_false_v<mcl::mp::lift_value<size>>);
}
});
}
template<size_t size, typename EmitFn>
static void EmitFMA(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Qresult = ctx.reg_alloc.ReadWriteQ(args[0], inst);
auto Qm = ctx.reg_alloc.ReadQ(args[1]);
auto Qn = ctx.reg_alloc.ReadQ(args[2]);
const bool fpcr_controlled = args[3].GetImmediateU1();
RegAlloc::Realize(Qresult, Qm, Qn);
ctx.fpsr.Load();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
if constexpr (size == 16) {
emit(Qresult->H8(), Qm->H8(), Qn->H8());
} else if constexpr (size == 32) {
emit(Qresult->S4(), Qm->S4(), Qn->S4());
} else if constexpr (size == 64) {
emit(Qresult->D2(), Qm->D2(), Qn->D2());
} else {
static_assert(Common::always_false_v<mcl::mp::lift_value<size>>);
}
});
}
template<size_t size, typename EmitFn>
static void EmitFromFixed(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Qto = ctx.reg_alloc.WriteQ(inst);
auto Qfrom = ctx.reg_alloc.ReadQ(args[0]);
const u8 fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
const bool fpcr_controlled = args[3].GetImmediateU1();
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
RegAlloc::Realize(Qto, Qfrom);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
if constexpr (size == 32) {
emit(Qto->S4(), Qfrom->S4(), fbits);
} else if constexpr (size == 64) {
emit(Qto->D2(), Qfrom->D2(), fbits);
} else {
static_assert(Common::always_false_v<mcl::mp::lift_value<size>>);
}
});
}
template<size_t fsize, bool is_signed>
void EmitToFixed(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Qto = ctx.reg_alloc.WriteQ(inst);
auto Qfrom = ctx.reg_alloc.ReadQ(args[0]);
const size_t fbits = args[1].GetImmediateU8();
const auto rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
const bool fpcr_controlled = inst->GetArg(3).GetU1();
RegAlloc::Realize(Qto, Qfrom);
ctx.fpsr.Load();
auto Vto = [&] {
if constexpr (fsize == 32) {
return Qto->S4();
} else if constexpr (fsize == 64) {
return Qto->D2();
} else {
static_assert(Common::always_false_v<mcl::mp::lift_value<fsize>>);
}
}();
auto Vfrom = [&] {
if constexpr (fsize == 32) {
return Qfrom->S4();
} else if constexpr (fsize == 64) {
return Qfrom->D2();
} else {
static_assert(Common::always_false_v<mcl::mp::lift_value<fsize>>);
}
}();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
if (rounding_mode == FP::RoundingMode::TowardsZero) {
if constexpr (is_signed) {
if (fbits) {
code.FCVTZS(Vto, Vfrom, fbits);
} else {
code.FCVTZS(Vto, Vfrom);
}
} else {
if (fbits) {
code.FCVTZU(Vto, Vfrom, fbits);
} else {
code.FCVTZU(Vto, Vfrom);
}
}
} else {
ASSERT(fbits == 0);
if constexpr (is_signed) {
switch (rounding_mode) {
case FP::RoundingMode::ToNearest_TieEven:
code.FCVTNS(Vto, Vfrom);
break;
case FP::RoundingMode::TowardsPlusInfinity:
code.FCVTPS(Vto, Vfrom);
break;
case FP::RoundingMode::TowardsMinusInfinity:
code.FCVTMS(Vto, Vfrom);
break;
case FP::RoundingMode::TowardsZero:
code.FCVTZS(Vto, Vfrom);
break;
case FP::RoundingMode::ToNearest_TieAwayFromZero:
code.FCVTAS(Vto, Vfrom);
break;
case FP::RoundingMode::ToOdd:
ASSERT_FALSE("Unimplemented");
break;
default:
ASSERT_FALSE("Invalid RoundingMode");
break;
}
} else {
switch (rounding_mode) {
case FP::RoundingMode::ToNearest_TieEven:
code.FCVTNU(Vto, Vfrom);
break;
case FP::RoundingMode::TowardsPlusInfinity:
code.FCVTPU(Vto, Vfrom);
break;
case FP::RoundingMode::TowardsMinusInfinity:
code.FCVTMU(Vto, Vfrom);
break;
case FP::RoundingMode::TowardsZero:
code.FCVTZU(Vto, Vfrom);
break;
case FP::RoundingMode::ToNearest_TieAwayFromZero:
code.FCVTAU(Vto, Vfrom);
break;
case FP::RoundingMode::ToOdd:
ASSERT_FALSE("Unimplemented");
break;
default:
ASSERT_FALSE("Invalid RoundingMode");
break;
}
}
}
});
}
template<typename Lambda>
static void EmitTwoOpFallbackWithoutRegAlloc(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::QReg Qresult, oaknut::QReg Qarg1, Lambda lambda, bool fpcr_controlled) {
const auto fn = static_cast<mcl::equivalent_function_type<Lambda>*>(lambda);
const u32 fpcr = ctx.FPCR(fpcr_controlled).Value();
constexpr u64 stack_size = sizeof(u64) * 4; // sizeof(u128) * 2
ABI_PushRegisters(code, ABI_CALLER_SAVE & ~(1ull << Qresult.index()), stack_size);
code.MOV(Xscratch0, mcl::bit_cast<u64>(fn));
code.ADD(X0, SP, 0 * 16);
code.ADD(X1, SP, 1 * 16);
code.MOV(X2, fpcr);
code.ADD(X3, Xstate, ctx.conf.state_fpsr_offset);
code.STR(Qarg1, X1);
code.BLR(Xscratch0);
code.LDR(Qresult, SP);
ABI_PopRegisters(code, ABI_CALLER_SAVE & ~(1ull << Qresult.index()), stack_size);
}
template<size_t fpcr_controlled_arg_index = 1, typename Lambda>
static void EmitTwoOpFallback(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Qarg1 = ctx.reg_alloc.ReadQ(args[0]);
auto Qresult = ctx.reg_alloc.WriteQ(inst);
RegAlloc::Realize(Qarg1, Qresult);
ctx.reg_alloc.SpillFlags();
ctx.fpsr.Spill();
const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1();
EmitTwoOpFallbackWithoutRegAlloc(code, ctx, Qresult, Qarg1, lambda, fpcr_controlled);
}
template<>
void EmitIR<IR::Opcode::FPVectorAbs16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Qresult = ctx.reg_alloc.ReadWriteQ(args[0], inst);
RegAlloc::Realize(Qresult);
code.BIC(Qresult->H8(), 0b10000000, LSL, 8);
}
template<>
void EmitIR<IR::Opcode::FPVectorAbs32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va) { code.FABS(Vresult, Va); });
}
template<>
void EmitIR<IR::Opcode::FPVectorAbs64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va) { code.FABS(Vresult, Va); });
}
template<>
void EmitIR<IR::Opcode::FPVectorAdd32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FADD(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorAdd64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FADD(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorDiv32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FDIV(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorDiv64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FDIV(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorEqual16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPVectorEqual32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FCMEQ(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorEqual64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FCMEQ(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorFromHalf32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto rounding_mode = static_cast<FP::RoundingMode>(args[1].GetImmediateU8());
ASSERT(rounding_mode == FP::RoundingMode::ToNearest_TieEven);
const bool fpcr_controlled = args[2].GetImmediateU1();
auto Qresult = ctx.reg_alloc.WriteQ(inst);
auto Doperand = ctx.reg_alloc.ReadD(args[0]);
RegAlloc::Realize(Qresult, Doperand);
ctx.fpsr.Load();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.FCVTL(Qresult->S4(), Doperand->H4());
});
}
template<>
void EmitIR<IR::Opcode::FPVectorFromSignedFixed32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitFromFixed<32>(code, ctx, inst, [&](auto Vto, auto Vfrom, u8 fbits) { fbits ? code.SCVTF(Vto, Vfrom, fbits) : code.SCVTF(Vto, Vfrom); });
}
template<>
void EmitIR<IR::Opcode::FPVectorFromSignedFixed64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitFromFixed<64>(code, ctx, inst, [&](auto Vto, auto Vfrom, u8 fbits) { fbits ? code.SCVTF(Vto, Vfrom, fbits) : code.SCVTF(Vto, Vfrom); });
}
template<>
void EmitIR<IR::Opcode::FPVectorFromUnsignedFixed32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitFromFixed<32>(code, ctx, inst, [&](auto Vto, auto Vfrom, u8 fbits) { fbits ? code.UCVTF(Vto, Vfrom, fbits) : code.UCVTF(Vto, Vfrom); });
}
template<>
void EmitIR<IR::Opcode::FPVectorFromUnsignedFixed64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitFromFixed<64>(code, ctx, inst, [&](auto Vto, auto Vfrom, u8 fbits) { fbits ? code.UCVTF(Vto, Vfrom, fbits) : code.UCVTF(Vto, Vfrom); });
}
template<>
void EmitIR<IR::Opcode::FPVectorGreater32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FCMGT(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorGreater64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FCMGT(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorGreaterEqual32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FCMGE(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorGreaterEqual64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FCMGE(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorMax32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMAX(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorMax64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMAX(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorMaxNumeric32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMAXNM(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorMaxNumeric64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMAXNM(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorMin32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMIN(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorMin64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMIN(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorMinNumeric32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMINNM(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorMinNumeric64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMINNM(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorMul32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMUL(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorMul64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMUL(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorMulAdd16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPVectorMulAdd32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitFMA<32>(code, ctx, inst, [&](auto Va, auto Vn, auto Vm) { code.FMLA(Va, Vn, Vm); });
}
template<>
void EmitIR<IR::Opcode::FPVectorMulAdd64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitFMA<64>(code, ctx, inst, [&](auto Va, auto Vn, auto Vm) { code.FMLA(Va, Vn, Vm); });
}
template<>
void EmitIR<IR::Opcode::FPVectorMulX32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMULX(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorMulX64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMULX(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorNeg16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPVectorNeg32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va) { code.FNEG(Vresult, Va); });
}
template<>
void EmitIR<IR::Opcode::FPVectorNeg64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va) { code.FNEG(Vresult, Va); });
}
template<>
void EmitIR<IR::Opcode::FPVectorPairedAdd32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FADDP(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorPairedAdd64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FADDP(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorPairedAddLower32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp(code, ctx, inst, [&](auto& Qresult, auto& Qa, auto& Qb) {
code.ZIP1(V0.D2(), Qa->D2(), Qb->D2());
code.MOVI(D1, oaknut::RepImm{0});
code.FADDP(Qresult->S4(), V0.S4(), V1.S4());
});
}
template<>
void EmitIR<IR::Opcode::FPVectorPairedAddLower64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOp(code, ctx, inst, [&](auto& Qresult, auto& Qa, auto& Qb) {
code.ZIP1(V0.D2(), Qa->D2(), Qb->D2());
code.FADDP(Qresult->toD(), V0.D2());
});
}
template<>
void EmitIR<IR::Opcode::FPVectorRecipEstimate16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPVectorRecipEstimate32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Voperand) { code.FRECPE(Vresult, Voperand); });
}
template<>
void EmitIR<IR::Opcode::FPVectorRecipEstimate64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Voperand) { code.FRECPE(Vresult, Voperand); });
}
template<>
void EmitIR<IR::Opcode::FPVectorRecipStepFused16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPVectorRecipStepFused32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FRECPS(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorRecipStepFused64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FRECPS(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorRoundInt16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto rounding = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
const bool exact = inst->GetArg(2).GetU1();
using rounding_list = mp::list<
mp::lift_value<FP::RoundingMode::ToNearest_TieEven>,
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
mp::lift_value<FP::RoundingMode::TowardsZero>,
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
using exact_list = mp::list<std::true_type, std::false_type>;
static const auto lut = Common::GenerateLookupTableFromList(
[]<typename I>(I) {
using FPT = u16;
return std::pair{
mp::lower_to_tuple_v<I>,
Common::FptrCast(
[](VectorArray<FPT>& output, const VectorArray<FPT>& input, FP::FPCR fpcr, FP::FPSR& fpsr) {
constexpr FP::RoundingMode rounding_mode = mp::get<0, I>::value;
constexpr bool exact = mp::get<1, I>::value;
for (size_t i = 0; i < output.size(); ++i) {
output[i] = static_cast<FPT>(FP::FPRoundInt<FPT>(input[i], fpcr, rounding_mode, exact, fpsr));
}
})};
},
mp::cartesian_product<rounding_list, exact_list>{});
EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact)));
}
template<>
void EmitIR<IR::Opcode::FPVectorRoundInt32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto rounding_mode = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
const bool exact = inst->GetArg(2).GetU1();
const bool fpcr_controlled = inst->GetArg(3).GetU1();
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Qresult = ctx.reg_alloc.WriteQ(inst);
auto Qoperand = ctx.reg_alloc.ReadQ(args[0]);
RegAlloc::Realize(Qresult, Qoperand);
ctx.fpsr.Load();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
if (exact) {
ASSERT(ctx.FPCR(fpcr_controlled).RMode() == rounding_mode);
code.FRINTX(Qresult->S4(), Qoperand->S4());
} else {
switch (rounding_mode) {
case FP::RoundingMode::ToNearest_TieEven:
code.FRINTN(Qresult->S4(), Qoperand->S4());
break;
case FP::RoundingMode::TowardsPlusInfinity:
code.FRINTP(Qresult->S4(), Qoperand->S4());
break;
case FP::RoundingMode::TowardsMinusInfinity:
code.FRINTM(Qresult->S4(), Qoperand->S4());
break;
case FP::RoundingMode::TowardsZero:
code.FRINTZ(Qresult->S4(), Qoperand->S4());
break;
case FP::RoundingMode::ToNearest_TieAwayFromZero:
code.FRINTA(Qresult->S4(), Qoperand->S4());
break;
default:
ASSERT_FALSE("Invalid RoundingMode");
}
}
});
}
template<>
void EmitIR<IR::Opcode::FPVectorRoundInt64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto rounding_mode = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
const bool exact = inst->GetArg(2).GetU1();
const bool fpcr_controlled = inst->GetArg(3).GetU1();
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Qresult = ctx.reg_alloc.WriteQ(inst);
auto Qoperand = ctx.reg_alloc.ReadQ(args[0]);
RegAlloc::Realize(Qresult, Qoperand);
ctx.fpsr.Load();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
if (exact) {
ASSERT(ctx.FPCR(fpcr_controlled).RMode() == rounding_mode);
code.FRINTX(Qresult->D2(), Qoperand->D2());
} else {
switch (rounding_mode) {
case FP::RoundingMode::ToNearest_TieEven:
code.FRINTN(Qresult->D2(), Qoperand->D2());
break;
case FP::RoundingMode::TowardsPlusInfinity:
code.FRINTP(Qresult->D2(), Qoperand->D2());
break;
case FP::RoundingMode::TowardsMinusInfinity:
code.FRINTM(Qresult->D2(), Qoperand->D2());
break;
case FP::RoundingMode::TowardsZero:
code.FRINTZ(Qresult->D2(), Qoperand->D2());
break;
case FP::RoundingMode::ToNearest_TieAwayFromZero:
code.FRINTA(Qresult->D2(), Qoperand->D2());
break;
default:
ASSERT_FALSE("Invalid RoundingMode");
}
}
});
}
template<>
void EmitIR<IR::Opcode::FPVectorRSqrtEstimate16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPVectorRSqrtEstimate32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Voperand) { code.FRSQRTE(Vresult, Voperand); });
}
template<>
void EmitIR<IR::Opcode::FPVectorRSqrtEstimate64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Voperand) { code.FRSQRTE(Vresult, Voperand); });
}
template<>
void EmitIR<IR::Opcode::FPVectorRSqrtStepFused16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPVectorRSqrtStepFused32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FRSQRTS(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorRSqrtStepFused64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FRSQRTS(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorSqrt32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va) { code.FSQRT(Vresult, Va); });
}
template<>
void EmitIR<IR::Opcode::FPVectorSqrt64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitTwoOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va) { code.FSQRT(Vresult, Va); });
}
template<>
void EmitIR<IR::Opcode::FPVectorSub32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FSUB(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorSub64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FSUB(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::FPVectorToHalf32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto rounding_mode = static_cast<FP::RoundingMode>(args[1].GetImmediateU8());
ASSERT(rounding_mode == FP::RoundingMode::ToNearest_TieEven);
const bool fpcr_controlled = args[2].GetImmediateU1();
auto Dresult = ctx.reg_alloc.WriteD(inst);
auto Qoperand = ctx.reg_alloc.ReadQ(args[0]);
RegAlloc::Realize(Dresult, Qoperand);
ctx.fpsr.Load();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.FCVTN(Dresult->H4(), Qoperand->S4());
});
}
template<>
void EmitIR<IR::Opcode::FPVectorToSignedFixed16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPVectorToSignedFixed32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitToFixed<32, true>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::FPVectorToSignedFixed64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitToFixed<64, true>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::FPVectorToUnsignedFixed16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
(void)code;
(void)ctx;
(void)inst;
ASSERT_FALSE("Unimplemented");
}
template<>
void EmitIR<IR::Opcode::FPVectorToUnsignedFixed32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitToFixed<32, false>(code, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::FPVectorToUnsignedFixed64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
EmitToFixed<64, false>(code, ctx, inst);
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,126 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <mcl/mp/metavalue/lift_value.hpp>
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/a32_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/fpsr_manager.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/common/always_false.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
template<size_t size, typename EmitFn>
static void Emit(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Qresult = ctx.reg_alloc.WriteQ(inst);
auto Qa = ctx.reg_alloc.ReadQ(args[0]);
auto Qb = ctx.reg_alloc.ReadQ(args[1]);
RegAlloc::Realize(Qresult, Qa, Qb);
ctx.fpsr.Load();
if constexpr (size == 8) {
emit(Qresult->B16(), Qa->B16(), Qb->B16());
} else if constexpr (size == 16) {
emit(Qresult->H8(), Qa->H8(), Qb->H8());
} else if constexpr (size == 32) {
emit(Qresult->S4(), Qa->S4(), Qb->S4());
} else if constexpr (size == 64) {
emit(Qresult->D2(), Qa->D2(), Qb->D2());
} else {
static_assert(Common::always_false_v<mcl::mp::lift_value<size>>);
}
}
template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedAdd8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
Emit<8>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.SQADD(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedAdd16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
Emit<16>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.SQADD(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedAdd32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
Emit<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.SQADD(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedAdd64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
Emit<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.SQADD(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedSub8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
Emit<8>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.SQSUB(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedSub16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
Emit<16>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.SQSUB(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedSub32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
Emit<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.SQSUB(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedSub64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
Emit<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.SQSUB(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::VectorUnsignedSaturatedAdd8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
Emit<8>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.UQADD(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::VectorUnsignedSaturatedAdd16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
Emit<16>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.UQADD(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::VectorUnsignedSaturatedAdd32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
Emit<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.UQADD(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::VectorUnsignedSaturatedAdd64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
Emit<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.UQADD(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::VectorUnsignedSaturatedSub8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
Emit<8>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.UQSUB(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::VectorUnsignedSaturatedSub16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
Emit<16>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.UQSUB(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::VectorUnsignedSaturatedSub32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
Emit<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.UQSUB(Vresult, Va, Vb); });
}
template<>
void EmitIR<IR::Opcode::VectorUnsignedSaturatedSub64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
Emit<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.UQSUB(Vresult, Va, Vb); });
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,51 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <functional>
#include <memory>
#include <vector>
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/reg_alloc.h"
#include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/ir/basic_block.h"
namespace Dynarmic::IR {
class Block;
} // namespace Dynarmic::IR
namespace Dynarmic::Backend::Arm64 {
struct EmitConfig;
class FastmemManager;
class FpsrManager;
using SharedLabel = std::shared_ptr<oaknut::Label>;
inline SharedLabel GenSharedLabel() {
return std::make_shared<oaknut::Label>();
}
struct EmitContext {
IR::Block& block;
RegAlloc& reg_alloc;
const EmitConfig& conf;
EmittedBlockInfo& ebi;
FpsrManager& fpsr;
FastmemManager& fastmem;
std::vector<std::function<void()>> deferred_emits;
FP::FPCR FPCR(bool fpcr_controlled = true) const {
const FP::FPCR fpcr = conf.descriptor_to_fpcr(block.Location());
return fpcr_controlled ? fpcr : fpcr.ASIMDStandardValue();
}
};
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,61 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/interface/exclusive_monitor.h"
#include <algorithm>
#include "dynarmic/common/assert.h"
namespace Dynarmic {
ExclusiveMonitor::ExclusiveMonitor(size_t processor_count)
: exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS), exclusive_values(processor_count) {}
size_t ExclusiveMonitor::GetProcessorCount() const {
return exclusive_addresses.size();
}
void ExclusiveMonitor::Lock() {
lock.Lock();
}
void ExclusiveMonitor::Unlock() {
lock.Unlock();
}
bool ExclusiveMonitor::CheckAndClear(size_t processor_id, VAddr address) {
const VAddr masked_address = address & RESERVATION_GRANULE_MASK;
Lock();
if (exclusive_addresses[processor_id] != masked_address) {
Unlock();
return false;
}
for (VAddr& other_address : exclusive_addresses) {
if (other_address == masked_address) {
other_address = INVALID_EXCLUSIVE_ADDRESS;
}
}
return true;
}
void ExclusiveMonitor::Clear() {
Lock();
std::fill(exclusive_addresses.begin(), exclusive_addresses.end(), INVALID_EXCLUSIVE_ADDRESS);
Unlock();
}
void ExclusiveMonitor::ClearProcessor(size_t processor_id) {
Lock();
exclusive_addresses[processor_id] = INVALID_EXCLUSIVE_ADDRESS;
Unlock();
}
} // namespace Dynarmic

View file

@ -0,0 +1,59 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <cstddef>
#include <tuple>
#include <mcl/hash/xmrx.hpp>
#include "dynarmic/common/common_types.h"
#include <ankerl/unordered_dense.h>
#include "dynarmic/backend/exception_handler.h"
#include "dynarmic/ir/location_descriptor.h"
namespace Dynarmic::Backend::Arm64 {
using DoNotFastmemMarker = std::tuple<IR::LocationDescriptor, unsigned>;
struct DoNotFastmemMarkerHash {
size_t operator()(const DoNotFastmemMarker& value) const {
return mcl::hash::xmrx(std::get<0>(value).Value() ^ static_cast<u64>(std::get<1>(value)));
}
};
struct FastmemPatchInfo {
DoNotFastmemMarker marker;
FakeCall fc;
bool recompile;
};
class FastmemManager {
public:
explicit FastmemManager(ExceptionHandler& eh)
: exception_handler(eh) {}
bool SupportsFastmem() const {
return exception_handler.SupportsFastmem();
}
bool ShouldFastmem(DoNotFastmemMarker marker) const {
return do_not_fastmem.count(marker) == 0;
}
void MarkDoNotFastmem(DoNotFastmemMarker marker) {
do_not_fastmem.insert(marker);
}
private:
ExceptionHandler& exception_handler;
ankerl::unordered_dense::set<DoNotFastmemMarker, DoNotFastmemMarkerHash> do_not_fastmem;
};
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,49 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/arm64/fpsr_manager.h"
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/abi.h"
namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
FpsrManager::FpsrManager(oaknut::CodeGenerator& code, size_t state_fpsr_offset)
: code{code}, state_fpsr_offset{state_fpsr_offset} {}
void FpsrManager::Spill() {
if (!fpsr_loaded)
return;
code.LDR(Wscratch0, Xstate, state_fpsr_offset);
code.MRS(Xscratch1, oaknut::SystemReg::FPSR);
code.ORR(Wscratch0, Wscratch0, Wscratch1);
code.STR(Wscratch0, Xstate, state_fpsr_offset);
fpsr_loaded = false;
}
void FpsrManager::Load() {
if (fpsr_loaded)
return;
code.MSR(oaknut::SystemReg::FPSR, XZR);
fpsr_loaded = true;
}
void FpsrManager::GetFpsr(oaknut::WReg dest) {
code.LDR(dest, Xstate, state_fpsr_offset);
if (fpsr_loaded) {
code.MRS(Xscratch1, oaknut::SystemReg::FPSR);
code.ORR(dest, dest, Wscratch1);
}
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,36 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include "dynarmic/common/common_types.h"
namespace oaknut {
struct CodeGenerator;
struct WReg;
} // namespace oaknut
namespace Dynarmic::Backend::Arm64 {
class FpsrManager {
public:
explicit FpsrManager(oaknut::CodeGenerator& code, size_t state_fpsr_offset);
void Spill();
void Load();
void Overwrite() { fpsr_loaded = false; }
void GetFpsr(oaknut::WReg);
private:
oaknut::CodeGenerator& code;
size_t state_fpsr_offset;
bool fpsr_loaded = false;
};
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,616 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/arm64/reg_alloc.h"
#include <algorithm>
#include <array>
#include <iterator>
#include "dynarmic/common/assert.h"
#include <mcl/bit/bit_field.hpp>
#include <mcl/bit_cast.hpp>
#include <mcl/mp/metavalue/lift_value.hpp>
#include "dynarmic/common/common_types.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/backend/arm64/fpsr_manager.h"
#include "dynarmic/backend/arm64/verbose_debugging_output.h"
#include "dynarmic/common/always_false.h"
namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
constexpr size_t spill_offset = offsetof(StackLayout, spill);
constexpr size_t spill_slot_size = sizeof(decltype(StackLayout::spill)::value_type);
static bool IsValuelessType(IR::Type type) {
switch (type) {
case IR::Type::Table:
return true;
default:
return false;
}
}
IR::Type Argument::GetType() const {
return value.GetType();
}
bool Argument::IsImmediate() const {
return value.IsImmediate();
}
bool Argument::GetImmediateU1() const {
return value.GetU1();
}
u8 Argument::GetImmediateU8() const {
const u64 imm = value.GetImmediateAsU64();
ASSERT(imm < 0x100);
return u8(imm);
}
u16 Argument::GetImmediateU16() const {
const u64 imm = value.GetImmediateAsU64();
ASSERT(imm < 0x10000);
return u16(imm);
}
u32 Argument::GetImmediateU32() const {
const u64 imm = value.GetImmediateAsU64();
ASSERT(imm < 0x100000000);
return u32(imm);
}
u64 Argument::GetImmediateU64() const {
return value.GetImmediateAsU64();
}
IR::Cond Argument::GetImmediateCond() const {
ASSERT(IsImmediate() && GetType() == IR::Type::Cond);
return value.GetCond();
}
IR::AccType Argument::GetImmediateAccType() const {
ASSERT(IsImmediate() && GetType() == IR::Type::AccType);
return value.GetAccType();
}
HostLoc::Kind Argument::CurrentLocationKind() const {
return reg_alloc.ValueLocation(value.GetInst())->kind;
}
bool HostLocInfo::Contains(const IR::Inst* value) const {
return std::find(values.begin(), values.end(), value) != values.end();
}
void HostLocInfo::SetupScratchLocation() {
ASSERT(IsCompletelyEmpty());
realized = true;
}
void HostLocInfo::SetupLocation(const IR::Inst* value) {
ASSERT(IsCompletelyEmpty());
values.clear();
values.push_back(value);
realized = true;
uses_this_inst = 0;
accumulated_uses = 0;
expected_uses = value->UseCount();
}
bool HostLocInfo::IsCompletelyEmpty() const {
return values.empty() && !locked && !realized && !accumulated_uses && !expected_uses && !uses_this_inst;
}
bool HostLocInfo::MaybeAllocatable() const {
return !locked && !realized;
}
bool HostLocInfo::IsOneRemainingUse() const {
return accumulated_uses + 1 == expected_uses && uses_this_inst == 1;
}
void HostLocInfo::UpdateUses() {
accumulated_uses += uses_this_inst;
uses_this_inst = 0;
if (accumulated_uses == expected_uses) {
values.clear();
accumulated_uses = 0;
expected_uses = 0;
}
}
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
for (size_t i = 0; i < inst->NumArgs(); i++) {
const IR::Value arg = inst->GetArg(i);
ret[i].value = arg;
if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) {
ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined");
ValueInfo(arg.GetInst()).uses_this_inst++;
}
}
return ret;
}
bool RegAlloc::WasValueDefined(IR::Inst* inst) const {
return defined_insts.count(inst) > 0;
}
void RegAlloc::PrepareForCall(std::optional<Argument::copyable_reference> arg0, std::optional<Argument::copyable_reference> arg1, std::optional<Argument::copyable_reference> arg2, std::optional<Argument::copyable_reference> arg3) {
fpsr_manager.Spill();
SpillFlags();
// TODO: Spill into callee-save registers
for (int i = 0; i < 32; i++) {
if (mcl::bit::get_bit(i, static_cast<u32>(ABI_CALLER_SAVE))) {
SpillGpr(i);
}
}
for (int i = 0; i < 32; i++) {
if (mcl::bit::get_bit(i, static_cast<u32>(ABI_CALLER_SAVE >> 32))) {
SpillFpr(i);
}
}
const std::array<std::optional<Argument::copyable_reference>, 4> args{arg0, arg1, arg2, arg3};
// AAPCS64 Next General-purpose Register Number
int ngrn = 0;
// AAPCS64 Next SIMD and Floating-point Register Number
int nsrn = 0;
for (int i = 0; i < 4; i++) {
if (args[i]) {
if (args[i]->get().GetType() == IR::Type::U128) {
ASSERT(fprs[nsrn].IsCompletelyEmpty());
LoadCopyInto(args[i]->get().value, oaknut::QReg{nsrn});
nsrn++;
} else {
ASSERT(gprs[ngrn].IsCompletelyEmpty());
LoadCopyInto(args[i]->get().value, oaknut::XReg{ngrn});
ngrn++;
}
} else {
// Gaps are assumed to be in general-purpose registers
// TODO: should there be a separate list passed for FPRs instead?
ngrn++;
}
}
}
void RegAlloc::DefineAsExisting(IR::Inst* inst, Argument& arg) {
defined_insts.insert(inst);
ASSERT(!ValueLocation(inst));
if (arg.value.IsImmediate()) {
inst->ReplaceUsesWith(arg.value);
return;
}
auto& info = ValueInfo(arg.value.GetInst());
info.values.push_back(inst);
info.expected_uses += inst->UseCount();
}
void RegAlloc::DefineAsRegister(IR::Inst* inst, oaknut::Reg reg) {
defined_insts.insert(inst);
ASSERT(!ValueLocation(inst));
auto& info = reg.is_vector() ? fprs[reg.index()] : gprs[reg.index()];
ASSERT(info.IsCompletelyEmpty());
info.values.push_back(inst);
info.expected_uses += inst->UseCount();
}
void RegAlloc::UpdateAllUses() {
for (auto& gpr : gprs) {
gpr.UpdateUses();
}
for (auto& fpr : fprs) {
fpr.UpdateUses();
}
flags.UpdateUses();
for (auto& spill : spills) {
spill.UpdateUses();
}
}
void RegAlloc::AssertAllUnlocked() const {
const auto is_unlocked = [](const auto& i) { return !i.locked && !i.realized; };
ASSERT(std::all_of(gprs.begin(), gprs.end(), is_unlocked));
ASSERT(std::all_of(fprs.begin(), fprs.end(), is_unlocked));
ASSERT(is_unlocked(flags));
ASSERT(std::all_of(spills.begin(), spills.end(), is_unlocked));
}
void RegAlloc::AssertNoMoreUses() const {
const auto is_empty = [](const auto& i) { return i.IsCompletelyEmpty(); };
ASSERT(std::all_of(gprs.begin(), gprs.end(), is_empty));
ASSERT(std::all_of(fprs.begin(), fprs.end(), is_empty));
ASSERT(is_empty(flags));
ASSERT(std::all_of(spills.begin(), spills.end(), is_empty));
}
void RegAlloc::EmitVerboseDebuggingOutput() {
code.MOV(X19, mcl::bit_cast<u64>(&PrintVerboseDebuggingOutputLine)); // Non-volatile register
const auto do_location = [&](HostLocInfo& info, HostLocType type, size_t index) {
using namespace oaknut::util;
for (const IR::Inst* value : info.values) {
code.MOV(X0, SP);
code.MOV(X1, static_cast<u64>(type));
code.MOV(X2, index);
code.MOV(X3, value->GetName());
code.MOV(X4, static_cast<u64>(value->GetType()));
code.BLR(X19);
}
};
for (size_t i = 0; i < gprs.size(); i++) {
do_location(gprs[i], HostLocType::X, i);
}
for (size_t i = 0; i < fprs.size(); i++) {
do_location(fprs[i], HostLocType::Q, i);
}
do_location(flags, HostLocType::Nzcv, 0);
for (size_t i = 0; i < spills.size(); i++) {
do_location(spills[i], HostLocType::Spill, i);
}
}
template<HostLoc::Kind kind>
int RegAlloc::GenerateImmediate(const IR::Value& value) {
ASSERT(value.GetType() != IR::Type::U1);
if constexpr (kind == HostLoc::Kind::Gpr) {
const int new_location_index = AllocateRegister(gprs, gpr_order);
SpillGpr(new_location_index);
gprs[new_location_index].SetupScratchLocation();
code.MOV(oaknut::XReg{new_location_index}, value.GetImmediateAsU64());
return new_location_index;
} else if constexpr (kind == HostLoc::Kind::Fpr) {
const int new_location_index = AllocateRegister(fprs, fpr_order);
SpillFpr(new_location_index);
fprs[new_location_index].SetupScratchLocation();
code.MOV(Xscratch0, value.GetImmediateAsU64());
code.FMOV(oaknut::DReg{new_location_index}, Xscratch0);
return new_location_index;
} else if constexpr (kind == HostLoc::Kind::Flags) {
SpillFlags();
flags.SetupScratchLocation();
code.MOV(Xscratch0, value.GetImmediateAsU64());
code.MSR(oaknut::SystemReg::NZCV, Xscratch0);
return 0;
} else {
static_assert(Common::always_false_v<mcl::mp::lift_value<kind>>);
}
}
template<HostLoc::Kind required_kind>
int RegAlloc::RealizeReadImpl(const IR::Value& value) {
if (value.IsImmediate()) {
return GenerateImmediate<required_kind>(value);
}
const auto current_location = ValueLocation(value.GetInst());
ASSERT(current_location);
if (current_location->kind == required_kind) {
ValueInfo(*current_location).realized = true;
return current_location->index;
}
ASSERT(!ValueInfo(*current_location).realized);
ASSERT(ValueInfo(*current_location).locked);
if constexpr (required_kind == HostLoc::Kind::Gpr) {
const int new_location_index = AllocateRegister(gprs, gpr_order);
SpillGpr(new_location_index);
switch (current_location->kind) {
case HostLoc::Kind::Gpr:
ASSERT_FALSE("Logic error");
break;
case HostLoc::Kind::Fpr:
code.FMOV(oaknut::XReg{new_location_index}, oaknut::DReg{current_location->index});
// ASSERT size fits
break;
case HostLoc::Kind::Spill:
code.LDR(oaknut::XReg{new_location_index}, SP, spill_offset + current_location->index * spill_slot_size);
break;
case HostLoc::Kind::Flags:
code.MRS(oaknut::XReg{new_location_index}, oaknut::SystemReg::NZCV);
break;
}
gprs[new_location_index] = std::exchange(ValueInfo(*current_location), {});
gprs[new_location_index].realized = true;
return new_location_index;
} else if constexpr (required_kind == HostLoc::Kind::Fpr) {
const int new_location_index = AllocateRegister(fprs, fpr_order);
SpillFpr(new_location_index);
switch (current_location->kind) {
case HostLoc::Kind::Gpr:
code.FMOV(oaknut::DReg{new_location_index}, oaknut::XReg{current_location->index});
break;
case HostLoc::Kind::Fpr:
ASSERT_FALSE("Logic error");
break;
case HostLoc::Kind::Spill:
code.LDR(oaknut::QReg{new_location_index}, SP, spill_offset + current_location->index * spill_slot_size);
break;
case HostLoc::Kind::Flags:
ASSERT_FALSE("Moving from flags into fprs is not currently supported");
break;
}
fprs[new_location_index] = std::exchange(ValueInfo(*current_location), {});
fprs[new_location_index].realized = true;
return new_location_index;
} else if constexpr (required_kind == HostLoc::Kind::Flags) {
ASSERT_FALSE("A simple read from flags is likely a logic error.");
} else {
static_assert(Common::always_false_v<mcl::mp::lift_value<required_kind>>);
}
}
template<HostLoc::Kind kind>
int RegAlloc::RealizeWriteImpl(const IR::Inst* value) {
defined_insts.insert(value);
ASSERT(!ValueLocation(value));
if constexpr (kind == HostLoc::Kind::Gpr) {
const int new_location_index = AllocateRegister(gprs, gpr_order);
SpillGpr(new_location_index);
gprs[new_location_index].SetupLocation(value);
return new_location_index;
} else if constexpr (kind == HostLoc::Kind::Fpr) {
const int new_location_index = AllocateRegister(fprs, fpr_order);
SpillFpr(new_location_index);
fprs[new_location_index].SetupLocation(value);
return new_location_index;
} else if constexpr (kind == HostLoc::Kind::Flags) {
SpillFlags();
flags.SetupLocation(value);
return 0;
} else {
static_assert(Common::always_false_v<mcl::mp::lift_value<kind>>);
}
}
template<HostLoc::Kind kind>
int RegAlloc::RealizeReadWriteImpl(const IR::Value& read_value, const IR::Inst* write_value) {
defined_insts.insert(write_value);
// TODO: Move elimination
const int write_loc = RealizeWriteImpl<kind>(write_value);
if constexpr (kind == HostLoc::Kind::Gpr) {
LoadCopyInto(read_value, oaknut::XReg{write_loc});
return write_loc;
} else if constexpr (kind == HostLoc::Kind::Fpr) {
LoadCopyInto(read_value, oaknut::QReg{write_loc});
return write_loc;
} else if constexpr (kind == HostLoc::Kind::Flags) {
ASSERT_FALSE("Incorrect function for ReadWrite of flags");
} else {
static_assert(Common::always_false_v<mcl::mp::lift_value<kind>>);
}
}
template int RegAlloc::RealizeReadImpl<HostLoc::Kind::Gpr>(const IR::Value& value);
template int RegAlloc::RealizeReadImpl<HostLoc::Kind::Fpr>(const IR::Value& value);
template int RegAlloc::RealizeReadImpl<HostLoc::Kind::Flags>(const IR::Value& value);
template int RegAlloc::RealizeWriteImpl<HostLoc::Kind::Gpr>(const IR::Inst* value);
template int RegAlloc::RealizeWriteImpl<HostLoc::Kind::Fpr>(const IR::Inst* value);
template int RegAlloc::RealizeWriteImpl<HostLoc::Kind::Flags>(const IR::Inst* value);
template int RegAlloc::RealizeReadWriteImpl<HostLoc::Kind::Gpr>(const IR::Value&, const IR::Inst*);
template int RegAlloc::RealizeReadWriteImpl<HostLoc::Kind::Fpr>(const IR::Value&, const IR::Inst*);
template int RegAlloc::RealizeReadWriteImpl<HostLoc::Kind::Flags>(const IR::Value&, const IR::Inst*);
int RegAlloc::AllocateRegister(const std::array<HostLocInfo, 32>& regs, const std::vector<int>& order) const {
const auto empty = std::find_if(order.begin(), order.end(), [&](int i) { return regs[i].IsCompletelyEmpty(); });
if (empty != order.end()) {
return *empty;
}
std::vector<int> candidates;
std::copy_if(order.begin(), order.end(), std::back_inserter(candidates), [&](int i) { return regs[i].MaybeAllocatable(); });
// TODO: LRU
std::uniform_int_distribution<size_t> dis{0, candidates.size() - 1};
return candidates[dis(rand_gen)];
}
void RegAlloc::SpillGpr(int index) {
ASSERT(!gprs[index].locked && !gprs[index].realized);
if (gprs[index].values.empty()) {
return;
}
const int new_location_index = FindFreeSpill();
code.STR(oaknut::XReg{index}, SP, spill_offset + new_location_index * spill_slot_size);
spills[new_location_index] = std::exchange(gprs[index], {});
}
void RegAlloc::SpillFpr(int index) {
ASSERT(!fprs[index].locked && !fprs[index].realized);
if (fprs[index].values.empty()) {
return;
}
const int new_location_index = FindFreeSpill();
code.STR(oaknut::QReg{index}, SP, spill_offset + new_location_index * spill_slot_size);
spills[new_location_index] = std::exchange(fprs[index], {});
}
void RegAlloc::ReadWriteFlags(Argument& read, IR::Inst* write) {
defined_insts.insert(write);
const auto current_location = ValueLocation(read.value.GetInst());
ASSERT(current_location);
if (current_location->kind == HostLoc::Kind::Flags) {
if (!flags.IsOneRemainingUse()) {
SpillFlags();
}
} else if (current_location->kind == HostLoc::Kind::Gpr) {
if (!flags.values.empty()) {
SpillFlags();
}
code.MSR(oaknut::SystemReg::NZCV, oaknut::XReg{current_location->index});
} else if (current_location->kind == HostLoc::Kind::Spill) {
if (!flags.values.empty()) {
SpillFlags();
}
code.LDR(Wscratch0, SP, spill_offset + current_location->index * spill_slot_size);
code.MSR(oaknut::SystemReg::NZCV, Xscratch0);
} else {
ASSERT_FALSE("Invalid current location for flags");
}
if (write) {
flags.SetupLocation(write);
flags.realized = false;
}
}
void RegAlloc::SpillFlags() {
ASSERT(!flags.locked && !flags.realized);
if (flags.values.empty()) {
return;
}
const int new_location_index = AllocateRegister(gprs, gpr_order);
SpillGpr(new_location_index);
code.MRS(oaknut::XReg{new_location_index}, oaknut::SystemReg::NZCV);
gprs[new_location_index] = std::exchange(flags, {});
}
int RegAlloc::FindFreeSpill() const {
const auto iter = std::find_if(spills.begin(), spills.end(), [](const HostLocInfo& info) { return info.values.empty(); });
ASSERT_MSG(iter != spills.end(), "All spill locations are full");
return static_cast<int>(iter - spills.begin());
}
void RegAlloc::LoadCopyInto(const IR::Value& value, oaknut::XReg reg) {
if (value.IsImmediate()) {
code.MOV(reg, value.GetImmediateAsU64());
return;
}
const auto current_location = ValueLocation(value.GetInst());
ASSERT(current_location);
switch (current_location->kind) {
case HostLoc::Kind::Gpr:
code.MOV(reg, oaknut::XReg{current_location->index});
break;
case HostLoc::Kind::Fpr:
code.FMOV(reg, oaknut::DReg{current_location->index});
// ASSERT size fits
break;
case HostLoc::Kind::Spill:
code.LDR(reg, SP, spill_offset + current_location->index * spill_slot_size);
break;
case HostLoc::Kind::Flags:
code.MRS(reg, oaknut::SystemReg::NZCV);
break;
}
}
void RegAlloc::LoadCopyInto(const IR::Value& value, oaknut::QReg reg) {
if (value.IsImmediate()) {
code.MOV(Xscratch0, value.GetImmediateAsU64());
code.FMOV(reg.toD(), Xscratch0);
return;
}
const auto current_location = ValueLocation(value.GetInst());
ASSERT(current_location);
switch (current_location->kind) {
case HostLoc::Kind::Gpr:
code.FMOV(reg.toD(), oaknut::XReg{current_location->index});
break;
case HostLoc::Kind::Fpr:
code.MOV(reg.B16(), oaknut::QReg{current_location->index}.B16());
break;
case HostLoc::Kind::Spill:
// TODO: Minimize move size to max value width
code.LDR(reg, SP, spill_offset + current_location->index * spill_slot_size);
break;
case HostLoc::Kind::Flags:
ASSERT_FALSE("Moving from flags into fprs is not currently supported");
break;
}
}
std::optional<HostLoc> RegAlloc::ValueLocation(const IR::Inst* value) const {
const auto contains_value = [value](const HostLocInfo& info) { return info.Contains(value); };
if (const auto iter = std::find_if(gprs.begin(), gprs.end(), contains_value); iter != gprs.end()) {
return HostLoc{HostLoc::Kind::Gpr, static_cast<int>(iter - gprs.begin())};
}
if (const auto iter = std::find_if(fprs.begin(), fprs.end(), contains_value); iter != fprs.end()) {
return HostLoc{HostLoc::Kind::Fpr, static_cast<int>(iter - fprs.begin())};
}
if (contains_value(flags)) {
return HostLoc{HostLoc::Kind::Flags, 0};
}
if (const auto iter = std::find_if(spills.begin(), spills.end(), contains_value); iter != spills.end()) {
return HostLoc{HostLoc::Kind::Spill, static_cast<int>(iter - spills.begin())};
}
return std::nullopt;
}
HostLocInfo& RegAlloc::ValueInfo(HostLoc host_loc) {
switch (host_loc.kind) {
case HostLoc::Kind::Gpr:
return gprs[static_cast<size_t>(host_loc.index)];
case HostLoc::Kind::Fpr:
return fprs[static_cast<size_t>(host_loc.index)];
case HostLoc::Kind::Flags:
return flags;
case HostLoc::Kind::Spill:
return spills[static_cast<size_t>(host_loc.index)];
}
ASSERT_FALSE("RegAlloc::ValueInfo: Invalid HostLoc::Kind");
}
HostLocInfo& RegAlloc::ValueInfo(const IR::Inst* value) {
const auto contains_value = [value](const HostLocInfo& info) { return info.Contains(value); };
if (const auto iter = std::find_if(gprs.begin(), gprs.end(), contains_value); iter != gprs.end()) {
return *iter;
}
if (const auto iter = std::find_if(fprs.begin(), fprs.end(), contains_value); iter != fprs.end()) {
return *iter;
}
if (contains_value(flags)) {
return flags;
}
if (const auto iter = std::find_if(spills.begin(), spills.end(), contains_value); iter != spills.end()) {
return *iter;
}
ASSERT_FALSE("RegAlloc::ValueInfo: Value not found");
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,379 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <array>
#include <optional>
#include <random>
#include <utility>
#include <vector>
#include "dynarmic/common/assert.h"
#include "dynarmic/common/common_types.h"
#include <mcl/type_traits/is_instance_of_template.hpp>
#include <oaknut/oaknut.hpp>
#include <ankerl/unordered_dense.h>
#include "dynarmic/backend/arm64/stack_layout.h"
#include "dynarmic/ir/cond.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/value.h"
namespace Dynarmic::Backend::Arm64 {
class FpsrManager;
class RegAlloc;
struct HostLoc final {
enum class Kind {
Gpr,
Fpr,
Flags,
Spill,
} kind;
int index;
};
enum RWType {
Void,
Read,
Write,
ReadWrite,
};
struct Argument final {
public:
using copyable_reference = std::reference_wrapper<Argument>;
IR::Type GetType() const;
bool IsVoid() const { return GetType() == IR::Type::Void; }
bool IsImmediate() const;
bool GetImmediateU1() const;
u8 GetImmediateU8() const;
u16 GetImmediateU16() const;
u32 GetImmediateU32() const;
u64 GetImmediateU64() const;
IR::Cond GetImmediateCond() const;
IR::AccType GetImmediateAccType() const;
// Only valid if not immediate
HostLoc::Kind CurrentLocationKind() const;
bool IsInGpr() const { return !IsImmediate() && CurrentLocationKind() == HostLoc::Kind::Gpr; }
bool IsInFpr() const { return !IsImmediate() && CurrentLocationKind() == HostLoc::Kind::Fpr; }
private:
friend class RegAlloc;
explicit Argument(RegAlloc& reg_alloc)
: reg_alloc{reg_alloc} {}
bool allocated = false;
RegAlloc& reg_alloc;
IR::Value value;
};
struct FlagsTag final {
private:
template<typename>
friend struct RAReg;
explicit FlagsTag(int) {}
int index() const { return 0; }
};
template<typename T>
struct RAReg final {
public:
static constexpr HostLoc::Kind kind = !std::is_same_v<FlagsTag, T>
? std::is_base_of_v<oaknut::VReg, T>
? HostLoc::Kind::Fpr
: HostLoc::Kind::Gpr
: HostLoc::Kind::Flags;
operator T() const { return reg.value(); }
operator oaknut::WRegWsp() const
requires(std::is_same_v<T, oaknut::WReg>)
{
return reg.value();
}
operator oaknut::XRegSp() const
requires(std::is_same_v<T, oaknut::XReg>)
{
return reg.value();
}
T operator*() const { return reg.value(); }
const T* operator->() const { return &reg.value(); }
~RAReg();
RAReg(RAReg&& other)
: reg_alloc{other.reg_alloc}
, rw{std::exchange(other.rw, RWType::Void)}
, read_value{std::exchange(other.read_value, {})}
, write_value{std::exchange(other.write_value, nullptr)}
, reg{std::exchange(other.reg, std::nullopt)} {
}
RAReg& operator=(RAReg&&) = delete;
private:
friend class RegAlloc;
explicit RAReg(RegAlloc& reg_alloc, RWType rw, const IR::Value& read_value, const IR::Inst* write_value);
RAReg(const RAReg&) = delete;
RAReg& operator=(const RAReg&) = delete;
void Realize();
RegAlloc& reg_alloc;
RWType rw;
IR::Value read_value;
const IR::Inst* write_value;
std::optional<T> reg;
};
struct HostLocInfo final {
std::vector<const IR::Inst*> values;
size_t locked = 0;
bool realized = false;
size_t uses_this_inst = 0;
size_t accumulated_uses = 0;
size_t expected_uses = 0;
bool Contains(const IR::Inst*) const;
void SetupScratchLocation();
void SetupLocation(const IR::Inst*);
bool IsCompletelyEmpty() const;
bool MaybeAllocatable() const;
bool IsOneRemainingUse() const;
void UpdateUses();
};
class RegAlloc final {
public:
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
explicit RegAlloc(oaknut::CodeGenerator& code, FpsrManager& fpsr_manager, std::vector<int> gpr_order, std::vector<int> fpr_order)
: code{code}, fpsr_manager{fpsr_manager}, gpr_order{gpr_order}, fpr_order{fpr_order}, rand_gen{std::random_device{}()} {}
ArgumentInfo GetArgumentInfo(IR::Inst* inst);
bool WasValueDefined(IR::Inst* inst) const;
auto ReadX(Argument& arg) { return RAReg<oaknut::XReg>{*this, RWType::Read, arg.value, nullptr}; }
auto ReadW(Argument& arg) { return RAReg<oaknut::WReg>{*this, RWType::Read, arg.value, nullptr}; }
auto ReadQ(Argument& arg) { return RAReg<oaknut::QReg>{*this, RWType::Read, arg.value, nullptr}; }
auto ReadD(Argument& arg) { return RAReg<oaknut::DReg>{*this, RWType::Read, arg.value, nullptr}; }
auto ReadS(Argument& arg) { return RAReg<oaknut::SReg>{*this, RWType::Read, arg.value, nullptr}; }
auto ReadH(Argument& arg) { return RAReg<oaknut::HReg>{*this, RWType::Read, arg.value, nullptr}; }
auto ReadB(Argument& arg) { return RAReg<oaknut::BReg>{*this, RWType::Read, arg.value, nullptr}; }
template<size_t size>
auto ReadReg(Argument& arg) {
if constexpr (size == 64) {
return ReadX(arg);
} else if constexpr (size == 32) {
return ReadW(arg);
} else {
ASSERT_FALSE("Invalid size to ReadReg {}", size);
}
}
template<size_t size>
auto ReadVec(Argument& arg) {
if constexpr (size == 128) {
return ReadQ(arg);
} else if constexpr (size == 64) {
return ReadD(arg);
} else if constexpr (size == 32) {
return ReadS(arg);
} else if constexpr (size == 16) {
return ReadH(arg);
} else if constexpr (size == 8) {
return ReadB(arg);
} else {
ASSERT_FALSE("Invalid size to ReadVec {}", size);
}
}
auto WriteX(IR::Inst* inst) { return RAReg<oaknut::XReg>{*this, RWType::Write, {}, inst}; }
auto WriteW(IR::Inst* inst) { return RAReg<oaknut::WReg>{*this, RWType::Write, {}, inst}; }
auto WriteQ(IR::Inst* inst) { return RAReg<oaknut::QReg>{*this, RWType::Write, {}, inst}; }
auto WriteD(IR::Inst* inst) { return RAReg<oaknut::DReg>{*this, RWType::Write, {}, inst}; }
auto WriteS(IR::Inst* inst) { return RAReg<oaknut::SReg>{*this, RWType::Write, {}, inst}; }
auto WriteH(IR::Inst* inst) { return RAReg<oaknut::HReg>{*this, RWType::Write, {}, inst}; }
auto WriteB(IR::Inst* inst) { return RAReg<oaknut::BReg>{*this, RWType::Write, {}, inst}; }
auto WriteFlags(IR::Inst* inst) { return RAReg<FlagsTag>{*this, RWType::Write, {}, inst}; }
template<size_t size>
auto WriteReg(IR::Inst* inst) {
if constexpr (size == 64) {
return WriteX(inst);
} else if constexpr (size == 32) {
return WriteW(inst);
} else {
ASSERT_FALSE("Invalid size to WriteReg {}", size);
}
}
template<size_t size>
auto WriteVec(IR::Inst* inst) {
if constexpr (size == 128) {
return WriteQ(inst);
} else if constexpr (size == 64) {
return WriteD(inst);
} else if constexpr (size == 32) {
return WriteS(inst);
} else if constexpr (size == 16) {
return WriteH(inst);
} else if constexpr (size == 8) {
return WriteB(inst);
} else {
ASSERT_FALSE("Invalid size to WriteVec {}", size);
}
}
auto ReadWriteX(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::XReg>{*this, RWType::ReadWrite, arg.value, inst}; }
auto ReadWriteW(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::WReg>{*this, RWType::ReadWrite, arg.value, inst}; }
auto ReadWriteQ(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::QReg>{*this, RWType::ReadWrite, arg.value, inst}; }
auto ReadWriteD(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::DReg>{*this, RWType::ReadWrite, arg.value, inst}; }
auto ReadWriteS(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::SReg>{*this, RWType::ReadWrite, arg.value, inst}; }
auto ReadWriteH(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::HReg>{*this, RWType::ReadWrite, arg.value, inst}; }
auto ReadWriteB(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::BReg>{*this, RWType::ReadWrite, arg.value, inst}; }
template<size_t size>
auto ReadWriteReg(Argument& arg, const IR::Inst* inst) {
if constexpr (size == 64) {
return ReadWriteX(arg, inst);
} else if constexpr (size == 32) {
return ReadWriteW(arg, inst);
} else {
ASSERT_FALSE("Invalid size to ReadWriteReg {}", size);
}
}
template<size_t size>
auto ReadWriteVec(Argument& arg, const IR::Inst* inst) {
if constexpr (size == 128) {
return ReadWriteQ(arg, inst);
} else if constexpr (size == 64) {
return ReadWriteD(arg, inst);
} else if constexpr (size == 32) {
return ReadWriteS(arg, inst);
} else if constexpr (size == 16) {
return ReadWriteH(arg, inst);
} else if constexpr (size == 8) {
return ReadWriteB(arg, inst);
} else {
ASSERT_FALSE("Invalid size to ReadWriteVec {}", size);
}
}
void PrepareForCall(std::optional<Argument::copyable_reference> arg0 = {}, std::optional<Argument::copyable_reference> arg1 = {}, std::optional<Argument::copyable_reference> arg2 = {}, std::optional<Argument::copyable_reference> arg3 = {});
void DefineAsExisting(IR::Inst* inst, Argument& arg);
void DefineAsRegister(IR::Inst* inst, oaknut::Reg reg);
void ReadWriteFlags(Argument& read, IR::Inst* write);
void SpillFlags();
void SpillAll();
template<typename... Ts>
static void Realize(Ts&... rs) {
static_assert((mcl::is_instance_of_template<RAReg, Ts>() && ...));
(rs.Realize(), ...);
}
void UpdateAllUses();
void AssertAllUnlocked() const;
void AssertNoMoreUses() const;
void EmitVerboseDebuggingOutput();
private:
friend struct Argument;
template<typename>
friend struct RAReg;
template<HostLoc::Kind kind>
int GenerateImmediate(const IR::Value& value);
template<HostLoc::Kind kind>
int RealizeReadImpl(const IR::Value& value);
template<HostLoc::Kind kind>
int RealizeWriteImpl(const IR::Inst* value);
template<HostLoc::Kind kind>
int RealizeReadWriteImpl(const IR::Value& read_value, const IR::Inst* write_value);
int AllocateRegister(const std::array<HostLocInfo, 32>& regs, const std::vector<int>& order) const;
void SpillGpr(int index);
void SpillFpr(int index);
int FindFreeSpill() const;
void LoadCopyInto(const IR::Value& value, oaknut::XReg reg);
void LoadCopyInto(const IR::Value& value, oaknut::QReg reg);
std::optional<HostLoc> ValueLocation(const IR::Inst* value) const;
HostLocInfo& ValueInfo(HostLoc host_loc);
HostLocInfo& ValueInfo(const IR::Inst* value);
oaknut::CodeGenerator& code;
FpsrManager& fpsr_manager;
std::vector<int> gpr_order;
std::vector<int> fpr_order;
std::array<HostLocInfo, 32> gprs;
std::array<HostLocInfo, 32> fprs;
HostLocInfo flags;
std::array<HostLocInfo, SpillCount> spills;
mutable std::mt19937 rand_gen;
ankerl::unordered_dense::set<const IR::Inst*> defined_insts;
};
template<typename T>
RAReg<T>::RAReg(RegAlloc& reg_alloc, RWType rw, const IR::Value& read_value, const IR::Inst* write_value)
: reg_alloc{reg_alloc}, rw{rw}, read_value{read_value}, write_value{write_value} {
if (rw != RWType::Write && !read_value.IsImmediate()) {
reg_alloc.ValueInfo(read_value.GetInst()).locked++;
}
}
template<typename T>
RAReg<T>::~RAReg() {
if (rw != RWType::Write && !read_value.IsImmediate()) {
reg_alloc.ValueInfo(read_value.GetInst()).locked--;
}
if (reg) {
reg_alloc.ValueInfo(HostLoc{kind, reg->index()}).realized = false;
}
}
template<typename T>
void RAReg<T>::Realize() {
switch (rw) {
case RWType::Read:
reg = T{reg_alloc.RealizeReadImpl<kind>(read_value)};
break;
case RWType::Write:
reg = T{reg_alloc.RealizeWriteImpl<kind>(write_value)};
break;
case RWType::ReadWrite:
reg = T{reg_alloc.RealizeReadWriteImpl<kind>(read_value, write_value)};
break;
default:
ASSERT_FALSE("Invalid RWType");
}
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,52 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <array>
#include "dynarmic/common/common_types.h"
namespace Dynarmic::Backend::Arm64 {
#ifdef _MSC_VER
# pragma warning(push)
# pragma warning(disable : 4324) // Structure was padded due to alignment specifier
#endif
constexpr size_t SpillCount = 64;
struct alignas(16) RSBEntry {
u64 target;
u64 code_ptr;
};
constexpr size_t RSBCount = 8;
constexpr u64 RSBIndexMask = (RSBCount - 1) * sizeof(RSBEntry);
struct alignas(16) StackLayout {
std::array<RSBEntry, RSBCount> rsb;
std::array<std::array<u64, 2>, SpillCount> spill;
u32 rsb_ptr;
s64 cycles_to_run;
u32 save_host_fpcr;
bool check_bit;
};
#ifdef _MSC_VER
# pragma warning(pop)
#endif
static_assert(sizeof(StackLayout) % 16 == 0);
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,108 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2023 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/arm64/verbose_debugging_output.h"
#include <fmt/format.h>
#include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/emit_context.h"
#include "dynarmic/ir/type.h"
namespace Dynarmic::Backend::Arm64 {
using namespace oaknut::util;
void EmitVerboseDebuggingOutput(oaknut::CodeGenerator& code, EmitContext& ctx) {
code.SUB(SP, SP, sizeof(RegisterData));
for (int i = 0; i < 30; i++) {
if (i == 18) {
continue; // Platform register
}
code.STR(oaknut::XReg{i}, SP, offsetof(RegisterData, x) + i * sizeof(u64));
}
for (int i = 0; i < 32; i++) {
code.STR(oaknut::QReg{i}, SP, offsetof(RegisterData, q) + i * sizeof(Vector));
}
code.MRS(X0, oaknut::SystemReg::NZCV);
code.STR(X0, SP, offsetof(RegisterData, nzcv));
code.ADD(X0, SP, sizeof(RegisterData) + offsetof(StackLayout, spill));
code.STR(X0, SP, offsetof(RegisterData, spill));
code.MRS(X0, oaknut::SystemReg::FPSR);
code.STR(X0, SP, offsetof(RegisterData, fpsr));
ctx.reg_alloc.EmitVerboseDebuggingOutput();
code.LDR(X0, SP, offsetof(RegisterData, fpsr));
code.MSR(oaknut::SystemReg::FPSR, X0);
code.LDR(X0, SP, offsetof(RegisterData, nzcv));
code.MSR(oaknut::SystemReg::NZCV, X0);
for (int i = 0; i < 32; i++) {
code.LDR(oaknut::QReg{i}, SP, offsetof(RegisterData, q) + i * sizeof(Vector));
}
for (int i = 0; i < 30; i++) {
if (i == 18) {
continue; // Platform register
}
code.LDR(oaknut::XReg{i}, SP, offsetof(RegisterData, x) + i * sizeof(u64));
}
code.ADD(SP, SP, sizeof(RegisterData));
}
void PrintVerboseDebuggingOutputLine(RegisterData& reg_data, HostLocType reg_type, size_t reg_index, size_t inst_index, IR::Type inst_type) {
fmt::print("dynarmic debug: %{:05} = ", inst_index);
Vector value = [&]() -> Vector {
switch (reg_type) {
case HostLocType::X:
return {reg_data.x[reg_index], 0};
case HostLocType::Q:
return reg_data.q[reg_index];
case HostLocType::Nzcv:
return {reg_data.nzcv, 0};
case HostLocType::Spill:
return (*reg_data.spill)[reg_index];
}
fmt::print("invalid reg_type! ");
return {0, 0};
}();
switch (inst_type) {
case IR::Type::U1:
case IR::Type::U8:
fmt::print("{:02x}", value[0] & 0xff);
break;
case IR::Type::U16:
fmt::print("{:04x}", value[0] & 0xffff);
break;
case IR::Type::U32:
case IR::Type::NZCVFlags:
fmt::print("{:08x}", value[0] & 0xffffffff);
break;
case IR::Type::U64:
fmt::print("{:016x}", value[0]);
break;
case IR::Type::U128:
fmt::print("{:016x}{:016x}", value[1], value[0]);
break;
case IR::Type::A32Reg:
case IR::Type::A32ExtReg:
case IR::Type::A64Reg:
case IR::Type::A64Vec:
case IR::Type::CoprocInfo:
case IR::Type::Cond:
case IR::Type::Void:
case IR::Type::Table:
case IR::Type::AccType:
case IR::Type::Opaque:
default:
fmt::print("invalid inst_type!");
break;
}
fmt::print("\n");
}
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,59 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2023 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <array>
#include "dynarmic/common/common_types.h"
#include "dynarmic/backend/arm64/stack_layout.h"
namespace oaknut {
struct CodeGenerator;
struct Label;
} // namespace oaknut
namespace Dynarmic::IR {
enum class Type : u16;
} // namespace Dynarmic::IR
namespace Dynarmic::Backend::Arm64 {
struct EmitContext;
using Vector = std::array<u64, 2>;
#ifdef _MSC_VER
# pragma warning(push)
# pragma warning(disable : 4324) // Structure was padded due to alignment specifier
#endif
enum class HostLocType {
X,
Q,
Nzcv,
Spill,
};
struct alignas(16) RegisterData {
std::array<u64, 30> x;
std::array<Vector, 32> q;
u32 nzcv;
decltype(StackLayout::spill)* spill;
u32 fpsr;
};
#ifdef _MSC_VER
# pragma warning(pop)
#endif
void EmitVerboseDebuggingOutput(oaknut::CodeGenerator& code, EmitContext& ctx);
void PrintVerboseDebuggingOutputLine(RegisterData& reg_data, HostLocType reg_type, size_t reg_index, size_t inst_index, IR::Type inst_type);
} // namespace Dynarmic::Backend::Arm64

View file

@ -0,0 +1,46 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/block_range_information.h"
#include <boost/icl/interval_map.hpp>
#include <boost/icl/interval_set.hpp>
#include "dynarmic/common/common_types.h"
#include <ankerl/unordered_dense.h>
namespace Dynarmic::Backend {
template<typename ProgramCounterType>
void BlockRangeInformation<ProgramCounterType>::AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location) {
block_ranges.add(std::make_pair(range, std::set<IR::LocationDescriptor>{location}));
}
template<typename ProgramCounterType>
void BlockRangeInformation<ProgramCounterType>::ClearCache() {
block_ranges.clear();
}
template<typename ProgramCounterType>
ankerl::unordered_dense::set<IR::LocationDescriptor> BlockRangeInformation<ProgramCounterType>::InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges) {
ankerl::unordered_dense::set<IR::LocationDescriptor> erase_locations;
for (auto invalidate_interval : ranges) {
auto pair = block_ranges.equal_range(invalidate_interval);
for (auto it = pair.first; it != pair.second; ++it) {
for (const auto& descriptor : it->second) {
erase_locations.insert(descriptor);
}
}
}
// TODO: EFFICIENCY: Remove ranges that are to be erased.
return erase_locations;
}
template class BlockRangeInformation<u32>;
template class BlockRangeInformation<u64>;
} // namespace Dynarmic::Backend

View file

@ -0,0 +1,29 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <set>
#include <boost/icl/interval_map.hpp>
#include <boost/icl/interval_set.hpp>
#include <ankerl/unordered_dense.h>
#include "dynarmic/ir/location_descriptor.h"
namespace Dynarmic::Backend {
template<typename ProgramCounterType>
class BlockRangeInformation {
public:
void AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location);
void ClearCache();
ankerl::unordered_dense::set<IR::LocationDescriptor> InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges);
private:
boost::icl::interval_map<ProgramCounterType, std::set<IR::LocationDescriptor>> block_ranges;
};
} // namespace Dynarmic::Backend

View file

@ -0,0 +1,75 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2020 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <functional>
#include <memory>
#include <optional>
#include <mcl/macro/architecture.hpp>
#include "dynarmic/common/common_types.h"
#if defined(MCL_ARCHITECTURE_X86_64)
namespace Dynarmic::Backend::X64 {
class BlockOfCode;
} // namespace Dynarmic::Backend::X64
#elif defined(MCL_ARCHITECTURE_ARM64)
namespace oaknut {
class CodeBlock;
} // namespace oaknut
#elif defined(MCL_ARCHITECTURE_RISCV)
namespace Dynarmic::Backend::RV64 {
class CodeBlock;
} // namespace Dynarmic::Backend::RV64
#else
# error "Invalid architecture"
#endif
namespace Dynarmic::Backend {
#if defined(MCL_ARCHITECTURE_X86_64)
struct FakeCall {
u64 call_rip;
u64 ret_rip;
};
#elif defined(MCL_ARCHITECTURE_ARM64)
struct FakeCall {
u64 call_pc;
};
#elif defined(MCL_ARCHITECTURE_RISCV)
struct FakeCall {
};
#else
# error "Invalid architecture"
#endif
class ExceptionHandler final {
public:
ExceptionHandler();
~ExceptionHandler();
#if defined(MCL_ARCHITECTURE_X86_64)
void Register(X64::BlockOfCode& code);
#elif defined(MCL_ARCHITECTURE_ARM64)
void Register(oaknut::CodeBlock& mem, std::size_t mem_size);
#elif defined(MCL_ARCHITECTURE_RISCV)
void Register(RV64::CodeBlock& mem, std::size_t mem_size);
#else
# error "Invalid architecture"
#endif
bool SupportsFastmem() const noexcept;
void SetFastmemCallback(std::function<FakeCall(u64)> cb);
private:
struct Impl;
std::unique_ptr<Impl> impl;
};
} // namespace Dynarmic::Backend

View file

@ -0,0 +1,40 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/exception_handler.h"
namespace Dynarmic::Backend {
struct ExceptionHandler::Impl final {
};
ExceptionHandler::ExceptionHandler() = default;
ExceptionHandler::~ExceptionHandler() = default;
#if defined(MCL_ARCHITECTURE_X86_64)
void ExceptionHandler::Register(X64::BlockOfCode&) {
// Do nothing
}
#elif defined(MCL_ARCHITECTURE_ARM64)
void ExceptionHandler::Register(oaknut::CodeBlock&, std::size_t) {
// Do nothing
}
#elif defined(MCL_ARCHITECTURE_RISCV)
void ExceptionHandler::Register(RV64::CodeBlock&, std::size_t) {
// Do nothing
}
#else
# error "Invalid architecture"
#endif
bool ExceptionHandler::SupportsFastmem() const noexcept {
return false;
}
void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)>) {
// Do nothing
}
} // namespace Dynarmic::Backend

View file

@ -0,0 +1,296 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2019 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <mach/mach.h>
#include <mach/message.h>
#include <cstring>
#include <functional>
#include <memory>
#include <mutex>
#include <optional>
#include <thread>
#include <vector>
#include <fmt/format.h>
#include "dynarmic/common/assert.h"
#include <mcl/bit_cast.hpp>
#include <mcl/macro/architecture.hpp>
#include "dynarmic/common/common_types.h"
#include "dynarmic/backend/exception_handler.h"
#if defined(MCL_ARCHITECTURE_X86_64)
# include "dynarmic/backend/x64/block_of_code.h"
# define mig_external extern "C"
# include "dynarmic/backend/x64/mig/mach_exc_server.h"
# define THREAD_STATE x86_THREAD_STATE64
# define THREAD_STATE_COUNT x86_THREAD_STATE64_COUNT
using dynarmic_thread_state_t = x86_thread_state64_t;
#elif defined(MCL_ARCHITECTURE_ARM64)
# include <oaknut/code_block.hpp>
# define mig_external extern "C"
# include "dynarmic/backend/arm64/mig/mach_exc_server.h"
# define THREAD_STATE ARM_THREAD_STATE64
# define THREAD_STATE_COUNT ARM_THREAD_STATE64_COUNT
using dynarmic_thread_state_t = arm_thread_state64_t;
#endif
namespace Dynarmic::Backend {
namespace {
struct CodeBlockInfo {
u64 code_begin, code_end;
std::function<FakeCall(u64)> cb;
};
struct MachMessage {
mach_msg_header_t head;
char data[2048]; ///< Arbitrary size
};
class MachHandler final {
public:
MachHandler();
~MachHandler();
kern_return_t HandleRequest(dynarmic_thread_state_t* thread_state);
void AddCodeBlock(CodeBlockInfo info);
void RemoveCodeBlock(u64 rip);
private:
auto FindCodeBlockInfo(u64 rip) {
return std::find_if(code_block_infos.begin(), code_block_infos.end(), [&](const auto& x) { return x.code_begin <= rip && x.code_end > rip; });
}
std::vector<CodeBlockInfo> code_block_infos;
std::mutex code_block_infos_mutex;
std::thread thread;
mach_port_t server_port;
void MessagePump();
};
MachHandler::MachHandler() {
#define KCHECK(x) ASSERT_MSG((x) == KERN_SUCCESS, "dynarmic: macOS MachHandler: init failure at {}", #x)
KCHECK(mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &server_port));
KCHECK(mach_port_insert_right(mach_task_self(), server_port, server_port, MACH_MSG_TYPE_MAKE_SEND));
KCHECK(task_set_exception_ports(mach_task_self(), EXC_MASK_BAD_ACCESS, server_port, EXCEPTION_STATE | MACH_EXCEPTION_CODES, THREAD_STATE));
// The below doesn't actually work, and I'm not sure why; since this doesn't work we'll have a spurious error message upon shutdown.
mach_port_t prev;
KCHECK(mach_port_request_notification(mach_task_self(), server_port, MACH_NOTIFY_PORT_DESTROYED, 0, server_port, MACH_MSG_TYPE_MAKE_SEND_ONCE, &prev));
#undef KCHECK
thread = std::thread(&MachHandler::MessagePump, this);
thread.detach();
}
MachHandler::~MachHandler() {
mach_port_deallocate(mach_task_self(), server_port);
}
void MachHandler::MessagePump() {
mach_msg_return_t mr;
MachMessage request;
MachMessage reply;
while (true) {
mr = mach_msg(&request.head, MACH_RCV_MSG | MACH_RCV_LARGE, 0, sizeof(request), server_port, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
if (mr != MACH_MSG_SUCCESS) {
fmt::print(stderr, "dynarmic: macOS MachHandler: Failed to receive mach message. error: {:#08x} ({})\n", mr, mach_error_string(mr));
return;
}
if (!mach_exc_server(&request.head, &reply.head)) {
fmt::print(stderr, "dynarmic: macOS MachHandler: Unexpected mach message\n");
return;
}
mr = mach_msg(&reply.head, MACH_SEND_MSG, reply.head.msgh_size, 0, MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
if (mr != MACH_MSG_SUCCESS) {
fmt::print(stderr, "dynarmic: macOS MachHandler: Failed to send mach message. error: {:#08x} ({})\n", mr, mach_error_string(mr));
return;
}
}
}
#if defined(MCL_ARCHITECTURE_X86_64)
kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) {
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
const auto iter = FindCodeBlockInfo(ts->__rip);
if (iter == code_block_infos.end()) {
fmt::print(stderr, "Unhandled EXC_BAD_ACCESS at rip {:#016x}\n", ts->__rip);
return KERN_FAILURE;
}
FakeCall fc = iter->cb(ts->__rip);
ts->__rsp -= sizeof(u64);
*mcl::bit_cast<u64*>(ts->__rsp) = fc.ret_rip;
ts->__rip = fc.call_rip;
return KERN_SUCCESS;
}
#elif defined(MCL_ARCHITECTURE_ARM64)
kern_return_t MachHandler::HandleRequest(arm_thread_state64_t* ts) {
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
const auto iter = FindCodeBlockInfo(ts->__pc);
if (iter == code_block_infos.end()) {
fmt::print(stderr, "Unhandled EXC_BAD_ACCESS at pc {:#016x}\n", ts->__pc);
return KERN_FAILURE;
}
FakeCall fc = iter->cb(ts->__pc);
// TODO: Sign with ptrauth_sign_unauthenticated if pointer authentication is enabled.
ts->__pc = fc.call_pc;
return KERN_SUCCESS;
}
#endif
void MachHandler::AddCodeBlock(CodeBlockInfo cbi) {
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
if (auto iter = FindCodeBlockInfo(cbi.code_begin); iter != code_block_infos.end()) {
code_block_infos.erase(iter);
}
code_block_infos.push_back(cbi);
}
void MachHandler::RemoveCodeBlock(u64 rip) {
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
const auto iter = FindCodeBlockInfo(rip);
if (iter == code_block_infos.end()) {
return;
}
code_block_infos.erase(iter);
}
std::mutex handler_lock;
std::optional<MachHandler> mach_handler;
void RegisterHandler() {
std::lock_guard<std::mutex> guard(handler_lock);
if (!mach_handler) {
mach_handler.emplace();
}
}
} // anonymous namespace
mig_external kern_return_t catch_mach_exception_raise(mach_port_t, mach_port_t, mach_port_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t) {
fmt::print(stderr, "dynarmic: Unexpected mach message: mach_exception_raise\n");
return KERN_FAILURE;
}
mig_external kern_return_t catch_mach_exception_raise_state_identity(mach_port_t, mach_port_t, mach_port_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, int*, thread_state_t, mach_msg_type_number_t, thread_state_t, mach_msg_type_number_t*) {
fmt::print(stderr, "dynarmic: Unexpected mach message: mach_exception_raise_state_identity\n");
return KERN_FAILURE;
}
mig_external kern_return_t catch_mach_exception_raise_state(
mach_port_t /*exception_port*/,
exception_type_t exception,
const mach_exception_data_t /*code*/, // code[0] is as per kern_return.h, code[1] is rip.
mach_msg_type_number_t /*codeCnt*/,
int* flavor,
const thread_state_t old_state,
mach_msg_type_number_t old_stateCnt,
thread_state_t new_state,
mach_msg_type_number_t* new_stateCnt) {
if (!flavor || !new_stateCnt) {
fmt::print(stderr, "dynarmic: catch_mach_exception_raise_state: Invalid arguments.\n");
return KERN_INVALID_ARGUMENT;
}
if (*flavor != THREAD_STATE || old_stateCnt != THREAD_STATE_COUNT || *new_stateCnt < THREAD_STATE_COUNT) {
fmt::print(stderr, "dynarmic: catch_mach_exception_raise_state: Unexpected flavor.\n");
return KERN_INVALID_ARGUMENT;
}
if (exception != EXC_BAD_ACCESS) {
fmt::print(stderr, "dynarmic: catch_mach_exception_raise_state: Unexpected exception type.\n");
return KERN_FAILURE;
}
// The input/output pointers are not necessarily 8-byte aligned.
dynarmic_thread_state_t ts;
std::memcpy(&ts, old_state, sizeof(ts));
kern_return_t ret = mach_handler->HandleRequest(&ts);
std::memcpy(new_state, &ts, sizeof(ts));
*new_stateCnt = THREAD_STATE_COUNT;
return ret;
}
struct ExceptionHandler::Impl final {
Impl(u64 code_begin_, u64 code_end_)
: code_begin(code_begin_)
, code_end(code_end_) {
RegisterHandler();
}
void SetCallback(std::function<FakeCall(u64)> cb) {
CodeBlockInfo cbi;
cbi.code_begin = code_begin;
cbi.code_end = code_end;
cbi.cb = cb;
mach_handler->AddCodeBlock(cbi);
}
~Impl() {
mach_handler->RemoveCodeBlock(code_begin);
}
private:
u64 code_begin, code_end;
};
ExceptionHandler::ExceptionHandler() = default;
ExceptionHandler::~ExceptionHandler() = default;
#if defined(MCL_ARCHITECTURE_X86_64)
void ExceptionHandler::Register(X64::BlockOfCode& code) {
const u64 code_begin = mcl::bit_cast<u64>(code.getCode());
const u64 code_end = code_begin + code.GetTotalCodeSize();
impl = std::make_unique<Impl>(code_begin, code_end);
}
#elif defined(MCL_ARCHITECTURE_ARM64)
void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) {
const u64 code_begin = mcl::bit_cast<u64>(mem.ptr());
const u64 code_end = code_begin + size;
impl = std::make_unique<Impl>(code_begin, code_end);
}
#else
# error "Invalid architecture"
#endif
bool ExceptionHandler::SupportsFastmem() const noexcept {
return static_cast<bool>(impl);
}
void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)> cb) {
impl->SetCallback(cb);
}
} // namespace Dynarmic::Backend

View file

@ -0,0 +1,14 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2023 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <mcl/macro/architecture.hpp>
#if defined(MCL_ARCHITECTURE_X86_64)
# include "dynarmic/backend/x64/mig/mach_exc_server.c"
#elif defined(MCL_ARCHITECTURE_ARM64)
# include "dynarmic/backend/arm64/mig/mach_exc_server.c"
#else
# error "Invalid architecture"
#endif

View file

@ -0,0 +1,342 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2019 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/exception_handler.h"
#ifdef __APPLE__
# include <signal.h>
# include <sys/ucontext.h>
#else
# include <signal.h>
# ifndef __OpenBSD__
# include <ucontext.h>
# endif
# ifdef __sun__
# include <sys/regset.h>
# endif
#endif
#include <cstring>
#include <functional>
#include <memory>
#include <mutex>
#include <optional>
#include <vector>
#include "dynarmic/common/assert.h"
#include <mcl/bit_cast.hpp>
#include "dynarmic/common/common_types.h"
#if defined(MCL_ARCHITECTURE_X86_64)
# include "dynarmic/backend/x64/block_of_code.h"
#elif defined(MCL_ARCHITECTURE_ARM64)
# include <oaknut/code_block.hpp>
# include "dynarmic/backend/arm64/abi.h"
#elif defined(MCL_ARCHITECTURE_RISCV)
# include "dynarmic/backend/riscv64/code_block.h"
#else
# error "Invalid architecture"
#endif
namespace Dynarmic::Backend {
namespace {
struct CodeBlockInfo {
u64 code_begin, code_end;
std::function<FakeCall(u64)> cb;
};
class SigHandler {
public:
SigHandler();
~SigHandler();
void AddCodeBlock(CodeBlockInfo info);
void RemoveCodeBlock(u64 host_pc);
bool SupportsFastmem() const { return supports_fast_mem; }
private:
auto FindCodeBlockInfo(u64 host_pc) {
return std::find_if(code_block_infos.begin(), code_block_infos.end(), [&](const auto& x) { return x.code_begin <= host_pc && x.code_end > host_pc; });
}
bool supports_fast_mem = true;
void* signal_stack_memory = nullptr;
std::vector<CodeBlockInfo> code_block_infos;
std::mutex code_block_infos_mutex;
struct sigaction old_sa_segv;
struct sigaction old_sa_bus;
static void SigAction(int sig, siginfo_t* info, void* raw_context);
};
std::mutex handler_lock;
std::optional<SigHandler> sig_handler;
void RegisterHandler() {
std::lock_guard<std::mutex> guard(handler_lock);
if (!sig_handler) {
sig_handler.emplace();
}
}
SigHandler::SigHandler() {
const size_t signal_stack_size = std::max<size_t>(SIGSTKSZ, 2 * 1024 * 1024);
signal_stack_memory = std::malloc(signal_stack_size);
stack_t signal_stack;
signal_stack.ss_sp = signal_stack_memory;
signal_stack.ss_size = signal_stack_size;
signal_stack.ss_flags = 0;
if (sigaltstack(&signal_stack, nullptr) != 0) {
fmt::print(stderr, "dynarmic: POSIX SigHandler: init failure at sigaltstack\n");
supports_fast_mem = false;
return;
}
struct sigaction sa;
sa.sa_handler = nullptr;
sa.sa_sigaction = &SigHandler::SigAction;
sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART;
sigemptyset(&sa.sa_mask);
if (sigaction(SIGSEGV, &sa, &old_sa_segv) != 0) {
fmt::print(stderr, "dynarmic: POSIX SigHandler: could not set SIGSEGV handler\n");
supports_fast_mem = false;
return;
}
#ifdef __APPLE__
if (sigaction(SIGBUS, &sa, &old_sa_bus) != 0) {
fmt::print(stderr, "dynarmic: POSIX SigHandler: could not set SIGBUS handler\n");
supports_fast_mem = false;
return;
}
#endif
}
SigHandler::~SigHandler() {
std::free(signal_stack_memory);
}
void SigHandler::AddCodeBlock(CodeBlockInfo cbi) {
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
if (auto iter = FindCodeBlockInfo(cbi.code_begin); iter != code_block_infos.end()) {
code_block_infos.erase(iter);
}
code_block_infos.push_back(cbi);
}
void SigHandler::RemoveCodeBlock(u64 host_pc) {
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
const auto iter = FindCodeBlockInfo(host_pc);
if (iter == code_block_infos.end()) {
return;
}
code_block_infos.erase(iter);
}
void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
ASSERT(sig == SIGSEGV || sig == SIGBUS);
#ifndef MCL_ARCHITECTURE_RISCV
ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(raw_context);
#ifndef __OpenBSD__
auto& mctx = ucontext->uc_mcontext;
#endif
#endif
#if defined(MCL_ARCHITECTURE_X86_64)
# if defined(__APPLE__)
# define CTX_RIP (mctx->__ss.__rip)
# define CTX_RSP (mctx->__ss.__rsp)
# elif defined(__linux__)
# define CTX_RIP (mctx.gregs[REG_RIP])
# define CTX_RSP (mctx.gregs[REG_RSP])
# elif defined(__FreeBSD__)
# define CTX_RIP (mctx.mc_rip)
# define CTX_RSP (mctx.mc_rsp)
# elif defined(__NetBSD__)
# define CTX_RIP (mctx.__gregs[_REG_RIP])
# define CTX_RSP (mctx.__gregs[_REG_RSP])
# elif defined(__OpenBSD__)
# define CTX_RIP (ucontext->sc_rip)
# define CTX_RSP (ucontext->sc_rsp)
# elif defined(__sun__)
# define CTX_RIP (mctx.gregs[REG_RIP])
# define CTX_RSP (mctx.gregs[REG_RSP])
# else
# error "Unknown platform"
# endif
{
std::lock_guard<std::mutex> guard(sig_handler->code_block_infos_mutex);
const auto iter = sig_handler->FindCodeBlockInfo(CTX_RIP);
if (iter != sig_handler->code_block_infos.end()) {
FakeCall fc = iter->cb(CTX_RIP);
CTX_RSP -= sizeof(u64);
*mcl::bit_cast<u64*>(CTX_RSP) = fc.ret_rip;
CTX_RIP = fc.call_rip;
return;
}
}
fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP);
#elif defined(MCL_ARCHITECTURE_ARM64)
# if defined(__APPLE__)
# define CTX_PC (mctx->__ss.__pc)
# define CTX_SP (mctx->__ss.__sp)
# define CTX_LR (mctx->__ss.__lr)
# define CTX_X(i) (mctx->__ss.__x[i])
# define CTX_Q(i) (mctx->__ns.__v[i])
# elif defined(__linux__)
# define CTX_PC (mctx.pc)
# define CTX_SP (mctx.sp)
# define CTX_LR (mctx.regs[30])
# define CTX_X(i) (mctx.regs[i])
# define CTX_Q(i) (fpctx->vregs[i])
[[maybe_unused]] const auto fpctx = [&mctx] {
_aarch64_ctx* header = (_aarch64_ctx*)&mctx.__reserved;
while (header->magic != FPSIMD_MAGIC) {
ASSERT(header->magic && header->size);
header = (_aarch64_ctx*)((char*)header + header->size);
}
return (fpsimd_context*)header;
}();
# elif defined(__FreeBSD__)
# define CTX_PC (mctx.mc_gpregs.gp_elr)
# define CTX_SP (mctx.mc_gpregs.gp_sp)
# define CTX_LR (mctx.mc_gpregs.gp_lr)
# define CTX_X(i) (mctx.mc_gpregs.gp_x[i])
# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i])
# elif defined(__NetBSD__)
# define CTX_PC (mctx.mc_gpregs.gp_elr)
# define CTX_SP (mctx.mc_gpregs.gp_sp)
# define CTX_LR (mctx.mc_gpregs.gp_lr)
# define CTX_X(i) (mctx.mc_gpregs.gp_x[i])
# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i])
# elif defined(__OpenBSD__)
# define CTX_PC (ucontext->sc_elr)
# define CTX_SP (ucontext->sc_sp)
# define CTX_LR (ucontext->sc_lr)
# define CTX_X(i) (ucontext->sc_x[i])
# define CTX_Q(i) (ucontext->sc_q[i])
# else
# error "Unknown platform"
# endif
{
std::lock_guard<std::mutex> guard(sig_handler->code_block_infos_mutex);
const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC);
if (iter != sig_handler->code_block_infos.end()) {
FakeCall fc = iter->cb(CTX_PC);
CTX_PC = fc.call_pc;
return;
}
}
fmt::print(stderr, "Unhandled {} at pc {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_PC);
#elif defined(MCL_ARCHITECTURE_RISCV)
ASSERT_FALSE("Unimplemented");
#else
# error "Invalid architecture"
#endif
struct sigaction* retry_sa = sig == SIGSEGV ? &sig_handler->old_sa_segv : &sig_handler->old_sa_bus;
if (retry_sa->sa_flags & SA_SIGINFO) {
retry_sa->sa_sigaction(sig, info, raw_context);
return;
}
if (retry_sa->sa_handler == SIG_DFL) {
signal(sig, SIG_DFL);
return;
}
if (retry_sa->sa_handler == SIG_IGN) {
return;
}
retry_sa->sa_handler(sig);
}
} // anonymous namespace
struct ExceptionHandler::Impl final {
Impl(u64 code_begin_, u64 code_end_)
: code_begin(code_begin_)
, code_end(code_end_) {
RegisterHandler();
}
void SetCallback(std::function<FakeCall(u64)> cb) {
CodeBlockInfo cbi;
cbi.code_begin = code_begin;
cbi.code_end = code_end;
cbi.cb = cb;
sig_handler->AddCodeBlock(cbi);
}
~Impl() {
sig_handler->RemoveCodeBlock(code_begin);
}
private:
u64 code_begin, code_end;
};
ExceptionHandler::ExceptionHandler() = default;
ExceptionHandler::~ExceptionHandler() = default;
#if defined(MCL_ARCHITECTURE_X86_64)
void ExceptionHandler::Register(X64::BlockOfCode& code) {
const u64 code_begin = mcl::bit_cast<u64>(code.getCode());
const u64 code_end = code_begin + code.GetTotalCodeSize();
impl = std::make_unique<Impl>(code_begin, code_end);
}
#elif defined(MCL_ARCHITECTURE_ARM64)
void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) {
const u64 code_begin = mcl::bit_cast<u64>(mem.ptr());
const u64 code_end = code_begin + size;
impl = std::make_unique<Impl>(code_begin, code_end);
}
#elif defined(MCL_ARCHITECTURE_RISCV)
void ExceptionHandler::Register(RV64::CodeBlock& mem, std::size_t size) {
const u64 code_begin = mcl::bit_cast<u64>(mem.ptr<u64>());
const u64 code_end = code_begin + size;
impl = std::make_unique<Impl>(code_begin, code_end);
}
#else
# error "Invalid architecture"
#endif
bool ExceptionHandler::SupportsFastmem() const noexcept {
return static_cast<bool>(impl) && sig_handler->SupportsFastmem();
}
void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)> cb) {
impl->SetCallback(cb);
}
} // namespace Dynarmic::Backend

View file

@ -0,0 +1,14 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2023 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <mcl/macro/architecture.hpp>
#if defined(MCL_ARCHITECTURE_X86_64)
# include "dynarmic/backend/x64/exception_handler_windows.cpp"
#elif defined(MCL_ARCHITECTURE_ARM64)
# include "dynarmic/backend/exception_handler_generic.cpp"
#else
# error "Invalid architecture"
#endif

View file

@ -0,0 +1,148 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/riscv64/a32_address_space.h"
#include "dynarmic/common/assert.h"
#include "dynarmic/backend/riscv64/abi.h"
#include "dynarmic/backend/riscv64/emit_riscv64.h"
#include "dynarmic/backend/riscv64/stack_layout.h"
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
#include "dynarmic/frontend/A32/translate/a32_translate.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Backend::RV64 {
A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf)
: conf(conf)
, cb(conf.code_cache_size)
, as(cb.ptr<u8*>(), conf.code_cache_size) {
EmitPrelude();
}
IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
Optimization::PolyfillPass(ir_block, {});
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true});
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
Optimization::VerificationPass(ir_block);
return ir_block;
}
CodePtr A32AddressSpace::Get(IR::LocationDescriptor descriptor) {
if (const auto iter = block_entries.find(descriptor.Value()); iter != block_entries.end()) {
return iter->second;
}
return nullptr;
}
CodePtr A32AddressSpace::GetOrEmit(IR::LocationDescriptor descriptor) {
if (CodePtr block_entry = Get(descriptor)) {
return block_entry;
}
IR::Block ir_block = GenerateIR(descriptor);
const EmittedBlockInfo block_info = Emit(std::move(ir_block));
block_infos.insert_or_assign(descriptor.Value(), block_info);
block_entries.insert_or_assign(descriptor.Value(), block_info.entry_point);
return block_info.entry_point;
}
void A32AddressSpace::ClearCache() {
block_entries.clear();
block_infos.clear();
SetCursorPtr(prelude_info.end_of_prelude);
}
void A32AddressSpace::EmitPrelude() {
using namespace biscuit;
prelude_info.run_code = GetCursorPtr<PreludeInfo::RunCodeFuncType>();
// TODO: Minimize this.
as.ADDI(sp, sp, -(64 * 8 + static_cast<int32_t>(sizeof(StackLayout))));
for (u32 i = 1; i < 32; i += 1) {
if (GPR{i} == sp || GPR{i} == tp)
continue;
as.SD(GPR{i}, i * 8 + static_cast<int32_t>(sizeof(StackLayout)), sp);
}
for (u32 i = 0; i < 32; i += 1) {
as.FSD(FPR{i}, (32 + i) * 8 + static_cast<int32_t>(sizeof(StackLayout)), sp);
}
as.MV(Xstate, a1);
as.MV(Xhalt, a2);
as.JR(a0);
prelude_info.return_from_run_code = GetCursorPtr<CodePtr>();
for (u32 i = 1; i < 32; i += 1) {
if (GPR{i} == sp || GPR{i} == tp)
continue;
as.LD(GPR{i}, i * 8 + static_cast<int32_t>(sizeof(StackLayout)), sp);
}
for (u32 i = 0; i < 32; i += 1) {
as.FLD(FPR{i}, (32 + i) * 8 + static_cast<int32_t>(sizeof(StackLayout)), sp);
}
as.ADDI(sp, sp, (64 * 8 + static_cast<int32_t>(sizeof(StackLayout))));
as.JALR(ra);
prelude_info.end_of_prelude = GetCursorPtr<CodePtr>();
}
void A32AddressSpace::SetCursorPtr(CodePtr ptr) {
ptrdiff_t offset = ptr - GetMemPtr<CodePtr>();
ASSERT(offset >= 0);
as.RewindBuffer(offset);
}
size_t A32AddressSpace::GetRemainingSize() {
return conf.code_cache_size - (GetCursorPtr<sptr>() - GetMemPtr<sptr>());
}
EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) {
if (GetRemainingSize() < 1024 * 1024) {
ClearCache();
}
EmittedBlockInfo block_info = EmitRV64(as, std::move(block), {
.enable_cycle_counting = conf.enable_cycle_counting,
.always_little_endian = conf.always_little_endian,
});
Link(block_info);
return block_info;
}
void A32AddressSpace::Link(EmittedBlockInfo& block_info) {
using namespace biscuit;
for (auto [ptr_offset, target] : block_info.relocations) {
Assembler a(reinterpret_cast<u8*>(block_info.entry_point) + ptr_offset, 4);
switch (target) {
case LinkTarget::ReturnFromRunCode: {
std::ptrdiff_t off = prelude_info.return_from_run_code - reinterpret_cast<CodePtr>(a.GetCursorPointer());
a.J(off);
break;
}
default:
ASSERT_FALSE("Invalid relocation target");
}
}
}
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,86 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <biscuit/assembler.hpp>
#include <ankerl/unordered_dense.h>
#include "dynarmic/backend/riscv64/code_block.h"
#include "dynarmic/backend/riscv64/emit_riscv64.h"
#include "dynarmic/interface/A32/config.h"
#include "dynarmic/interface/halt_reason.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/location_descriptor.h"
namespace Dynarmic::Backend::RV64 {
struct A32JitState;
class A32AddressSpace final {
public:
explicit A32AddressSpace(const A32::UserConfig& conf);
IR::Block GenerateIR(IR::LocationDescriptor) const;
CodePtr Get(IR::LocationDescriptor descriptor);
CodePtr GetOrEmit(IR::LocationDescriptor descriptor);
void ClearCache();
private:
friend class A32Core;
void EmitPrelude();
template<typename T>
T GetMemPtr() {
static_assert(std::is_pointer_v<T> || std::is_same_v<T, uptr> || std::is_same_v<T, sptr>);
return reinterpret_cast<T>(as.GetBufferPointer(0));
}
template<typename T>
T GetMemPtr() const {
static_assert(std::is_pointer_v<T> || std::is_same_v<T, uptr> || std::is_same_v<T, sptr>);
return reinterpret_cast<const T>(as.GetBufferPointer(0));
}
template<typename T>
T GetCursorPtr() {
static_assert(std::is_pointer_v<T> || std::is_same_v<T, uptr> || std::is_same_v<T, sptr>);
return reinterpret_cast<T>(as.GetCursorPointer());
}
template<typename T>
T GetCursorPtr() const {
static_assert(std::is_pointer_v<T> || std::is_same_v<T, uptr> || std::is_same_v<T, sptr>);
return reinterpret_cast<const T>(as.GetCursorPointer());
}
void SetCursorPtr(CodePtr ptr);
size_t GetRemainingSize();
EmittedBlockInfo Emit(IR::Block ir_block);
void Link(EmittedBlockInfo& block);
const A32::UserConfig conf;
CodeBlock cb;
biscuit::Assembler as;
ankerl::unordered_dense::map<u64, CodePtr> block_entries;
ankerl::unordered_dense::map<u64, EmittedBlockInfo> block_infos;
struct PreludeInfo {
CodePtr end_of_prelude;
using RunCodeFuncType = HaltReason (*)(CodePtr entry_point, A32JitState* context, volatile u32* halt_reason);
RunCodeFuncType run_code;
CodePtr return_from_run_code;
} prelude_info;
};
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,24 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include "dynarmic/backend/riscv64/a32_address_space.h"
#include "dynarmic/backend/riscv64/a32_jitstate.h"
namespace Dynarmic::Backend::RV64 {
class A32Core final {
public:
explicit A32Core(const A32::UserConfig&) {}
HaltReason Run(A32AddressSpace& process, A32JitState& thread_ctx, volatile u32* halt_reason) {
const auto location_descriptor = thread_ctx.GetLocationDescriptor();
const auto entry_point = process.GetOrEmit(location_descriptor);
return process.prelude_info.run_code(entry_point, &thread_ctx, halt_reason);
}
};
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,220 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <memory>
#include <mutex>
#include <boost/icl/interval_set.hpp>
#include "dynarmic/common/assert.h"
#include <mcl/scope_exit.hpp>
#include "dynarmic/common/common_types.h"
#include "dynarmic/backend/riscv64/a32_address_space.h"
#include "dynarmic/backend/riscv64/a32_core.h"
#include "dynarmic/backend/riscv64/a32_jitstate.h"
#include "dynarmic/common/atomic.h"
#include "dynarmic/interface/A32/a32.h"
namespace Dynarmic::A32 {
using namespace Backend::RV64;
struct Jit::Impl final {
Impl(Jit* jit_interface, A32::UserConfig conf)
: jit_interface(jit_interface)
, conf(conf)
, current_address_space(conf)
, core(conf) {}
HaltReason Run() {
ASSERT(!jit_interface->is_executing);
jit_interface->is_executing = true;
SCOPE_EXIT {
jit_interface->is_executing = false;
};
HaltReason hr = core.Run(current_address_space, current_state, &halt_reason);
RequestCacheInvalidation();
return hr;
}
HaltReason Step() {
ASSERT(!jit_interface->is_executing);
jit_interface->is_executing = true;
SCOPE_EXIT {
jit_interface->is_executing = false;
};
UNIMPLEMENTED();
RequestCacheInvalidation();
return HaltReason{};
}
void ClearCache() {
std::unique_lock lock{invalidation_mutex};
invalidate_entire_cache = true;
HaltExecution(HaltReason::CacheInvalidation);
}
void InvalidateCacheRange(u32 start_address, size_t length) {
std::unique_lock lock{invalidation_mutex};
invalid_cache_ranges.add(boost::icl::discrete_interval<u32>::closed(start_address, static_cast<u32>(start_address + length - 1)));
HaltExecution(HaltReason::CacheInvalidation);
}
void Reset() {
current_state = {};
}
void HaltExecution(HaltReason hr) {
Atomic::Or(&halt_reason, ~static_cast<u32>(hr));
}
void ClearHalt(HaltReason hr) {
Atomic::And(&halt_reason, ~static_cast<u32>(hr));
}
std::array<u32, 16>& Regs() {
return current_state.regs;
}
const std::array<u32, 16>& Regs() const {
return current_state.regs;
}
std::array<u32, 64>& ExtRegs() {
return current_state.ext_regs;
}
const std::array<u32, 64>& ExtRegs() const {
return current_state.ext_regs;
}
u32 Cpsr() const {
return current_state.Cpsr();
}
void SetCpsr(u32 value) {
current_state.SetCpsr(value);
}
u32 Fpscr() const {
return current_state.Fpscr();
}
void SetFpscr(u32 value) {
current_state.SetFpscr(value);
}
void ClearExclusiveState() {
current_state.exclusive_state = false;
}
void DumpDisassembly() const {
UNIMPLEMENTED();
}
private:
void RequestCacheInvalidation() {
// ASSERT_FALSE("Unimplemented");
invalidate_entire_cache = false;
invalid_cache_ranges.clear();
}
Jit* jit_interface;
A32::UserConfig conf;
A32JitState current_state{};
A32AddressSpace current_address_space;
A32Core core;
volatile u32 halt_reason = 0;
std::mutex invalidation_mutex;
boost::icl::interval_set<u32> invalid_cache_ranges;
bool invalidate_entire_cache = false;
};
Jit::Jit(UserConfig conf)
: impl(std::make_unique<Impl>(this, conf)) {}
Jit::~Jit() = default;
HaltReason Jit::Run() {
return impl->Run();
}
HaltReason Jit::Step() {
return impl->Step();
}
void Jit::ClearCache() {
impl->ClearCache();
}
void Jit::InvalidateCacheRange(u32 start_address, std::size_t length) {
impl->InvalidateCacheRange(start_address, length);
}
void Jit::Reset() {
impl->Reset();
}
void Jit::HaltExecution(HaltReason hr) {
impl->HaltExecution(hr);
}
void Jit::ClearHalt(HaltReason hr) {
impl->ClearHalt(hr);
}
std::array<u32, 16>& Jit::Regs() {
return impl->Regs();
}
const std::array<u32, 16>& Jit::Regs() const {
return impl->Regs();
}
std::array<u32, 64>& Jit::ExtRegs() {
return impl->ExtRegs();
}
const std::array<u32, 64>& Jit::ExtRegs() const {
return impl->ExtRegs();
}
u32 Jit::Cpsr() const {
return impl->Cpsr();
}
void Jit::SetCpsr(u32 value) {
impl->SetCpsr(value);
}
u32 Jit::Fpscr() const {
return impl->Fpscr();
}
void Jit::SetFpscr(u32 value) {
impl->SetFpscr(value);
}
void Jit::ClearExclusiveState() {
impl->ClearExclusiveState();
}
void Jit::DumpDisassembly() const {
impl->DumpDisassembly();
}
} // namespace Dynarmic::A32

View file

@ -0,0 +1,76 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/riscv64/a32_jitstate.h"
#include <mcl/bit/bit_field.hpp>
#include "dynarmic/common/common_types.h"
namespace Dynarmic::Backend::RV64 {
u32 A32JitState::Cpsr() const {
u32 cpsr = 0;
// NZCV flags
cpsr |= cpsr_nzcv;
// Q flag
cpsr |= cpsr_q;
// GE flags
cpsr |= mcl::bit::get_bit<31>(cpsr_ge) ? 1 << 19 : 0;
cpsr |= mcl::bit::get_bit<23>(cpsr_ge) ? 1 << 18 : 0;
cpsr |= mcl::bit::get_bit<15>(cpsr_ge) ? 1 << 17 : 0;
cpsr |= mcl::bit::get_bit<7>(cpsr_ge) ? 1 << 16 : 0;
// E flag, T flag
cpsr |= mcl::bit::get_bit<1>(upper_location_descriptor) ? 1 << 9 : 0;
cpsr |= mcl::bit::get_bit<0>(upper_location_descriptor) ? 1 << 5 : 0;
// IT state
cpsr |= static_cast<u32>(upper_location_descriptor & 0b11111100'00000000);
cpsr |= static_cast<u32>(upper_location_descriptor & 0b00000011'00000000) << 17;
// Other flags
cpsr |= cpsr_jaifm;
return cpsr;
}
void A32JitState::SetCpsr(u32 cpsr) {
// NZCV flags
cpsr_nzcv = cpsr & 0xF0000000;
// Q flag
cpsr_q = cpsr & (1 << 27);
// GE flags
cpsr_ge = 0;
cpsr_ge |= mcl::bit::get_bit<19>(cpsr) ? 0xFF000000 : 0;
cpsr_ge |= mcl::bit::get_bit<18>(cpsr) ? 0x00FF0000 : 0;
cpsr_ge |= mcl::bit::get_bit<17>(cpsr) ? 0x0000FF00 : 0;
cpsr_ge |= mcl::bit::get_bit<16>(cpsr) ? 0x000000FF : 0;
upper_location_descriptor &= 0xFFFF0000;
// E flag, T flag
upper_location_descriptor |= mcl::bit::get_bit<9>(cpsr) ? 2 : 0;
upper_location_descriptor |= mcl::bit::get_bit<5>(cpsr) ? 1 : 0;
// IT state
upper_location_descriptor |= (cpsr >> 0) & 0b11111100'00000000;
upper_location_descriptor |= (cpsr >> 17) & 0b00000011'00000000;
// Other flags
cpsr_jaifm = cpsr & 0x010001DF;
}
constexpr u32 FPCR_MASK = A32::LocationDescriptor::FPSCR_MODE_MASK;
constexpr u32 FPSR_MASK = 0xF800009F;
u32 A32JitState::Fpscr() const {
return (upper_location_descriptor & 0xffff0000) | fpsr;
}
void A32JitState::SetFpscr(u32 fpscr) {
fpsr = fpscr & FPSR_MASK;
upper_location_descriptor = (upper_location_descriptor & 0x0000ffff) | (fpscr & FPCR_MASK);
}
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,48 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <array>
#include "dynarmic/common/common_types.h"
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
#include "dynarmic/ir/location_descriptor.h"
namespace Dynarmic::Backend::RV64 {
struct A32JitState {
u32 cpsr_nzcv = 0;
u32 cpsr_q = 0;
u32 cpsr_jaifm = 0;
u32 cpsr_ge = 0;
u32 fpsr = 0;
u32 fpsr_nzcv = 0;
std::array<u32, 16> regs{};
u32 upper_location_descriptor;
alignas(16) std::array<u32, 64> ext_regs{};
u32 exclusive_state = 0;
u32 Cpsr() const;
void SetCpsr(u32 cpsr);
u32 Fpscr() const;
void SetFpscr(u32 fpscr);
IR::LocationDescriptor GetLocationDescriptor() const {
return IR::LocationDescriptor{regs[15] | (static_cast<u64>(upper_location_descriptor) << 32)};
}
};
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,20 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <biscuit/registers.hpp>
namespace Dynarmic::Backend::RV64 {
constexpr biscuit::GPR Xstate{27};
constexpr biscuit::GPR Xhalt{26};
constexpr biscuit::GPR Xscratch0{30}, Xscratch1{31};
constexpr std::initializer_list<u32> GPR_ORDER{8, 9, 18, 19, 20, 21, 22, 23, 24, 25, 5, 6, 7, 28, 29, 10, 11, 12, 13, 14, 15, 16, 17};
constexpr std::initializer_list<u32> FPR_ORDER{8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,39 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <cstdint>
#include <new>
#include <sys/mman.h>
namespace Dynarmic::Backend::RV64 {
class CodeBlock {
public:
explicit CodeBlock(std::size_t size) noexcept : memsize(size) {
mem = (u8*)mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
if (mem == nullptr)
ASSERT_FALSE("out of memory");
}
~CodeBlock() noexcept {
if (mem == nullptr)
return;
munmap(mem, memsize);
}
template<typename T>
T ptr() const noexcept {
static_assert(std::is_pointer_v<T> || std::is_same_v<T, uptr> || std::is_same_v<T, sptr>);
return reinterpret_cast<T>(mem);
}
protected:
u8* mem = nullptr;
size_t memsize = 0;
};
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,26 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include "dynarmic/backend/riscv64/emit_riscv64.h"
#include "dynarmic/backend/riscv64/reg_alloc.h"
namespace Dynarmic::IR {
class Block;
} // namespace Dynarmic::IR
namespace Dynarmic::Backend::RV64 {
struct EmitConfig;
struct EmitContext {
IR::Block& block;
RegAlloc& reg_alloc;
const EmitConfig& emit_conf;
EmittedBlockInfo& ebi;
};
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,174 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/riscv64/emit_riscv64.h"
#include <bit>
#include <biscuit/assembler.hpp>
#include <fmt/ostream.h>
#include <mcl/bit/bit_field.hpp>
#include "dynarmic/backend/riscv64/a32_jitstate.h"
#include "dynarmic/backend/riscv64/abi.h"
#include "dynarmic/backend/riscv64/emit_context.h"
#include "dynarmic/backend/riscv64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::RV64 {
template<>
void EmitIR<IR::Opcode::Void>(biscuit::Assembler&, EmitContext&, IR::Inst*) {}
template<>
void EmitIR<IR::Opcode::Identity>(biscuit::Assembler&, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.DefineAsExisting(inst, args[0]);
}
template<>
void EmitIR<IR::Opcode::Breakpoint>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::CallHostFunction>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PushRSB>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::GetCarryFromOp>(biscuit::Assembler&, EmitContext& ctx, IR::Inst* inst) {
[[maybe_unused]] auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(ctx.reg_alloc.IsValueLive(inst));
}
template<>
void EmitIR<IR::Opcode::GetOverflowFromOp>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::GetGEFromOp>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::GetNZCVFromOp>(biscuit::Assembler&, EmitContext& ctx, IR::Inst* inst) {
[[maybe_unused]] auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(ctx.reg_alloc.IsValueLive(inst));
}
template<>
void EmitIR<IR::Opcode::GetNZFromOp>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
auto Xnz = ctx.reg_alloc.WriteX(inst);
RegAlloc::Realize(Xvalue, Xnz);
as.SEQZ(Xnz, Xvalue);
as.SLLI(Xnz, Xnz, 30);
as.SLTZ(Xscratch0, Xvalue);
as.SLLI(Xscratch0, Xscratch0, 31);
as.OR(Xnz, Xnz, Xscratch0);
}
template<>
void EmitIR<IR::Opcode::GetUpperFromOp>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::GetLowerFromOp>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::GetCFlagFromNZCV>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xc = ctx.reg_alloc.WriteX(inst);
auto Xnzcv = ctx.reg_alloc.ReadX(args[0]);
RegAlloc::Realize(Xc, Xnzcv);
as.LUI(Xscratch0, 0x20000);
as.AND(Xc, Xnzcv, Xscratch0);
}
template<>
void EmitIR<IR::Opcode::NZCVFromPackedFlags>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
EmittedBlockInfo EmitRV64(biscuit::Assembler& as, IR::Block block, const EmitConfig& emit_conf) {
using namespace biscuit;
EmittedBlockInfo ebi;
RegAlloc reg_alloc{as, GPR_ORDER, FPR_ORDER};
EmitContext ctx{block, reg_alloc, emit_conf, ebi};
ebi.entry_point = reinterpret_cast<CodePtr>(as.GetCursorPointer());
for (auto iter = block.begin(); iter != block.end(); ++iter) {
IR::Inst* inst = &*iter;
switch (inst->GetOpcode()) {
#define OPCODE(name, type, ...) \
case IR::Opcode::name: \
EmitIR<IR::Opcode::name>(as, ctx, inst); \
break;
#define A32OPC(name, type, ...) \
case IR::Opcode::A32##name: \
EmitIR<IR::Opcode::A32##name>(as, ctx, inst); \
break;
#define A64OPC(name, type, ...) \
case IR::Opcode::A64##name: \
EmitIR<IR::Opcode::A64##name>(as, ctx, inst); \
break;
#include "dynarmic/ir/opcodes.inc"
#undef OPCODE
#undef A32OPC
#undef A64OPC
default:
ASSERT_FALSE("Invalid opcode: {}", inst->GetOpcode());
break;
}
}
reg_alloc.UpdateAllUses();
reg_alloc.AssertNoMoreUses();
if (emit_conf.enable_cycle_counting) {
const size_t cycles_to_add = block.CycleCount();
as.LD(Xscratch0, offsetof(StackLayout, cycles_remaining), sp);
if (mcl::bit::sign_extend<12>(-cycles_to_add) == -cycles_to_add) {
as.ADDI(Xscratch0, Xscratch0, -cycles_to_add);
} else {
as.LI(Xscratch1, cycles_to_add);
as.SUB(Xscratch0, Xscratch0, Xscratch1);
}
as.SD(Xscratch0, offsetof(StackLayout, cycles_remaining), sp);
}
EmitA32Terminal(as, ctx);
ebi.size = reinterpret_cast<CodePtr>(as.GetCursorPointer()) - ebi.entry_point;
return ebi;
}
void EmitRelocation(biscuit::Assembler& as, EmitContext& ctx, LinkTarget link_target) {
ctx.ebi.relocations.emplace_back(Relocation{reinterpret_cast<CodePtr>(as.GetCursorPointer()) - ctx.ebi.entry_point, link_target});
as.NOP();
}
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,61 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <vector>
#include <biscuit/label.hpp>
#include "dynarmic/common/common_types.h"
namespace biscuit {
class Assembler;
} // namespace biscuit
namespace Dynarmic::IR {
class Block;
class Inst;
enum class Cond;
enum class Opcode;
} // namespace Dynarmic::IR
namespace Dynarmic::Backend::RV64 {
using CodePtr = std::byte*;
enum class LinkTarget {
ReturnFromRunCode,
};
struct Relocation {
std::ptrdiff_t code_offset;
LinkTarget target;
};
struct EmittedBlockInfo {
CodePtr entry_point;
size_t size;
std::vector<Relocation> relocations;
};
struct EmitConfig {
bool enable_cycle_counting;
bool always_little_endian;
};
struct EmitContext;
EmittedBlockInfo EmitRV64(biscuit::Assembler& as, IR::Block block, const EmitConfig& emit_conf);
template<IR::Opcode op>
void EmitIR(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst);
void EmitRelocation(biscuit::Assembler& as, EmitContext& ctx, LinkTarget link_target);
void EmitA32Cond(biscuit::Assembler& as, EmitContext& ctx, IR::Cond cond, biscuit::Label* label);
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx);
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,401 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <biscuit/assembler.hpp>
#include <fmt/ostream.h>
#include "dynarmic/backend/riscv64/a32_jitstate.h"
#include "dynarmic/backend/riscv64/abi.h"
#include "dynarmic/backend/riscv64/emit_context.h"
#include "dynarmic/backend/riscv64/emit_riscv64.h"
#include "dynarmic/backend/riscv64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::RV64 {
void EmitA32Cond(biscuit::Assembler& as, EmitContext&, IR::Cond cond, biscuit::Label* label) {
as.LWU(Xscratch0, offsetof(A32JitState, cpsr_nzcv), Xstate);
as.SRLIW(Xscratch0, Xscratch0, 28);
switch (cond) {
case IR::Cond::EQ:
// Z == 1
as.ANDI(Xscratch0, Xscratch0, 0b0100);
as.BNEZ(Xscratch0, label);
break;
case IR::Cond::NE:
// Z = 0
as.ANDI(Xscratch0, Xscratch0, 0b0100);
as.BEQZ(Xscratch0, label);
break;
case IR::Cond::CS:
// C == 1
as.ANDI(Xscratch0, Xscratch0, 0b0010);
as.BNEZ(Xscratch0, label);
break;
case IR::Cond::CC:
// C == 0
as.ANDI(Xscratch0, Xscratch0, 0b0010);
as.BEQZ(Xscratch0, label);
break;
case IR::Cond::MI:
// N == 1
as.ANDI(Xscratch0, Xscratch0, 0b1000);
as.BNEZ(Xscratch0, label);
break;
case IR::Cond::PL:
// N == 0
as.ANDI(Xscratch0, Xscratch0, 0b1000);
as.BEQZ(Xscratch0, label);
break;
case IR::Cond::VS:
// V == 1
as.ANDI(Xscratch0, Xscratch0, 0b0001);
as.BNEZ(Xscratch0, label);
break;
case IR::Cond::VC:
// V == 0
as.ANDI(Xscratch0, Xscratch0, 0b0001);
as.BEQZ(Xscratch0, label);
break;
case IR::Cond::HI:
// Z == 0 && C == 1
as.ANDI(Xscratch0, Xscratch0, 0b0110);
as.ADDI(Xscratch1, biscuit::zero, 0b0010);
as.BEQ(Xscratch0, Xscratch1, label);
break;
case IR::Cond::LS:
// Z == 1 || C == 0
as.ANDI(Xscratch0, Xscratch0, 0b0110);
as.ADDI(Xscratch1, biscuit::zero, 0b0010);
as.BNE(Xscratch0, Xscratch1, label);
break;
case IR::Cond::GE:
// N == V
as.ANDI(Xscratch0, Xscratch0, 0b1001);
as.ADDI(Xscratch1, biscuit::zero, 0b1001);
as.BEQ(Xscratch0, Xscratch1, label);
as.BEQZ(Xscratch0, label);
break;
case IR::Cond::LT:
// N != V
as.ANDI(Xscratch0, Xscratch0, 0b1001);
as.ADDI(Xscratch1, biscuit::zero, 0b1000);
as.BEQ(Xscratch0, Xscratch1, label);
as.ADDI(Xscratch1, biscuit::zero, 0b0001);
as.BEQ(Xscratch0, Xscratch1, label);
break;
case IR::Cond::GT:
// Z == 0 && N == V
as.ANDI(Xscratch0, Xscratch0, 0b1101);
as.ADDI(Xscratch1, biscuit::zero, 0b1001);
as.BEQ(Xscratch0, Xscratch1, label);
as.BEQZ(Xscratch0, label);
break;
case IR::Cond::LE:
// Z == 1 || N != V
as.ANDI(Xscratch0, Xscratch0, 0b1101);
as.LI(Xscratch1, 0b11000100110010);
as.SRLW(Xscratch0, Xscratch1, Xscratch0);
as.ANDI(Xscratch0, Xscratch0, 1);
as.BNEZ(Xscratch0, label);
break;
default:
ASSERT_MSG(false, "Unknown cond {}", static_cast<size_t>(cond));
break;
}
}
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step);
void EmitA32Terminal(biscuit::Assembler&, EmitContext&, IR::Term::Interpret, IR::LocationDescriptor, bool) {
ASSERT_FALSE("Interpret should never be emitted.");
}
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) {
EmitRelocation(as, ctx, LinkTarget::ReturnFromRunCode);
}
void EmitSetUpperLocationDescriptor(biscuit::Assembler& as, EmitContext& ctx, IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) {
auto get_upper = [](const IR::LocationDescriptor& desc) -> u32 {
return static_cast<u32>(A32::LocationDescriptor{desc}.SetSingleStepping(false).UniqueHash() >> 32);
};
const u32 old_upper = get_upper(old_location);
const u32 new_upper = [&] {
const u32 mask = ~u32(ctx.emit_conf.always_little_endian ? 0x2 : 0);
return get_upper(new_location) & mask;
}();
if (old_upper != new_upper) {
as.LI(Xscratch0, new_upper);
as.SW(Xscratch0, offsetof(A32JitState, upper_location_descriptor), Xstate);
}
}
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool) {
EmitSetUpperLocationDescriptor(as, ctx, terminal.next, initial_location);
as.LI(Xscratch0, terminal.next.Value());
as.SW(Xscratch0, offsetof(A32JitState, regs) + sizeof(u32) * 15, Xstate);
EmitRelocation(as, ctx, LinkTarget::ReturnFromRunCode);
// TODO: Implement LinkBlock optimization
}
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool) {
EmitSetUpperLocationDescriptor(as, ctx, terminal.next, initial_location);
as.LI(Xscratch0, terminal.next.Value());
as.SW(Xscratch0, offsetof(A32JitState, regs) + sizeof(u32) * 15, Xstate);
EmitRelocation(as, ctx, LinkTarget::ReturnFromRunCode);
// TODO: Implement LinkBlockFast optimization
}
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::PopRSBHint, IR::LocationDescriptor, bool) {
EmitRelocation(as, ctx, LinkTarget::ReturnFromRunCode);
// TODO: Implement PopRSBHint optimization
}
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::FastDispatchHint, IR::LocationDescriptor, bool) {
EmitRelocation(as, ctx, LinkTarget::ReturnFromRunCode);
// TODO: Implement FastDispatchHint optimization
}
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
biscuit::Label pass;
EmitA32Cond(as, ctx, terminal.if_, &pass);
EmitA32Terminal(as, ctx, terminal.else_, initial_location, is_single_step);
as.Bind(&pass);
EmitA32Terminal(as, ctx, terminal.then_, initial_location, is_single_step);
}
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
biscuit::Label fail;
as.LBU(Xscratch0, offsetof(StackLayout, check_bit), Xstate);
as.BEQZ(Xscratch0, &fail);
EmitA32Terminal(as, ctx, terminal.then_, initial_location, is_single_step);
as.Bind(&fail);
EmitA32Terminal(as, ctx, terminal.else_, initial_location, is_single_step);
}
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
biscuit::Label fail;
as.LWU(Xscratch0, 0, Xhalt);
as.FENCE(biscuit::FenceOrder::RW, biscuit::FenceOrder::RW);
as.BNEZ(Xscratch0, &fail);
EmitA32Terminal(as, ctx, terminal.else_, initial_location, is_single_step);
as.Bind(&fail);
EmitRelocation(as, ctx, LinkTarget::ReturnFromRunCode);
}
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
boost::apply_visitor([&](const auto& t) { EmitA32Terminal(as, ctx, t, initial_location, is_single_step); }, terminal);
}
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx) {
const A32::LocationDescriptor location{ctx.block.Location()};
EmitA32Terminal(as, ctx, ctx.block.GetTerminal(), location.SetSingleStepping(false), location.SingleStepping());
}
template<>
void EmitIR<IR::Opcode::A32SetCheckBit>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32GetRegister>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
auto Xresult = ctx.reg_alloc.WriteX(inst);
RegAlloc::Realize(Xresult);
as.LWU(Xresult, offsetof(A32JitState, regs) + sizeof(u32) * static_cast<size_t>(reg), Xstate);
}
template<>
void EmitIR<IR::Opcode::A32GetExtendedRegister32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32GetExtendedRegister64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32GetVector>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32SetRegister>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xvalue = ctx.reg_alloc.ReadX(args[1]);
RegAlloc::Realize(Xvalue);
// TODO: Detect if Gpr vs Fpr is more appropriate
as.SW(Xvalue, offsetof(A32JitState, regs) + sizeof(u32) * static_cast<size_t>(reg), Xstate);
}
template<>
void EmitIR<IR::Opcode::A32SetExtendedRegister32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32SetExtendedRegister64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32SetVector>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32GetCpsr>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32SetCpsr>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32SetCpsrNZCV>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xnzcv = ctx.reg_alloc.ReadX(args[0]);
RegAlloc::Realize(Xnzcv);
as.SW(Xnzcv, offsetof(A32JitState, cpsr_nzcv), Xstate);
}
template<>
void EmitIR<IR::Opcode::A32SetCpsrNZCVRaw>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32SetCpsrNZCVQ>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32SetCpsrNZ>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32SetCpsrNZC>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
// TODO: Add full implementation
ASSERT(!args[0].IsImmediate() && !args[1].IsImmediate());
auto Xnz = ctx.reg_alloc.ReadX(args[0]);
auto Xc = ctx.reg_alloc.ReadX(args[1]);
RegAlloc::Realize(Xnz, Xc);
as.LWU(Xscratch0, offsetof(A32JitState, cpsr_nzcv), Xstate);
as.LUI(Xscratch1, 0x10000);
as.AND(Xscratch0, Xscratch0, Xscratch1);
as.OR(Xscratch0, Xscratch0, Xnz);
as.OR(Xscratch0, Xscratch0, Xc);
as.SW(Xscratch0, offsetof(A32JitState, cpsr_nzcv), Xstate);
}
template<>
void EmitIR<IR::Opcode::A32GetCFlag>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32OrQFlag>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32GetGEFlags>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32SetGEFlags>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32SetGEFlagsCompressed>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32BXWritePC>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32UpdateUpperLocationDescriptor>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32CallSupervisor>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32ExceptionRaised>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32DataSynchronizationBarrier>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32DataMemoryBarrier>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32InstructionSynchronizationBarrier>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32GetFpscr>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32SetFpscr>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32GetFpscrNZCV>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32SetFpscrNZCV>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,55 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <biscuit/assembler.hpp>
#include <fmt/ostream.h>
#include "dynarmic/backend/riscv64/a32_jitstate.h"
#include "dynarmic/backend/riscv64/abi.h"
#include "dynarmic/backend/riscv64/emit_context.h"
#include "dynarmic/backend/riscv64/emit_riscv64.h"
#include "dynarmic/backend/riscv64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::RV64 {
template<>
void EmitIR<IR::Opcode::A32CoprocInternalOperation>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32CoprocSendOneWord>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32CoprocSendTwoWords>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32CoprocGetOneWord>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32CoprocGetTwoWords>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32CoprocLoadWords>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32CoprocStoreWords>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,105 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <biscuit/assembler.hpp>
#include <fmt/ostream.h>
#include "dynarmic/backend/riscv64/a32_jitstate.h"
#include "dynarmic/backend/riscv64/abi.h"
#include "dynarmic/backend/riscv64/emit_context.h"
#include "dynarmic/backend/riscv64/emit_riscv64.h"
#include "dynarmic/backend/riscv64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::RV64 {
template<>
void EmitIR<IR::Opcode::A32ClearExclusive>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32ReadMemory8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32ReadMemory16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32ReadMemory32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32ReadMemory64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32ExclusiveReadMemory8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32ExclusiveReadMemory16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32ExclusiveReadMemory32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32ExclusiveReadMemory64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32WriteMemory8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32WriteMemory16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32WriteMemory32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32WriteMemory64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32ExclusiveWriteMemory8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32ExclusiveWriteMemory16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32ExclusiveWriteMemory32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A32ExclusiveWriteMemory64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,200 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <biscuit/assembler.hpp>
#include <fmt/ostream.h>
#include "dynarmic/backend/riscv64/a32_jitstate.h"
#include "dynarmic/backend/riscv64/abi.h"
#include "dynarmic/backend/riscv64/emit_context.h"
#include "dynarmic/backend/riscv64/emit_riscv64.h"
#include "dynarmic/backend/riscv64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::RV64 {
template<>
void EmitIR<IR::Opcode::A64SetCheckBit>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64GetCFlag>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64GetNZCVRaw>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64SetNZCVRaw>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64SetNZCV>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64GetW>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64GetX>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64GetS>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64GetD>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64GetQ>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64GetSP>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64GetFPCR>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64GetFPSR>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64SetW>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64SetX>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64SetS>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64SetD>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64SetQ>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64SetSP>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64SetFPCR>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64SetFPSR>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64SetPC>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64CallSupervisor>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64ExceptionRaised>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64DataCacheOperationRaised>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64InstructionCacheOperationRaised>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64DataSynchronizationBarrier>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64DataMemoryBarrier>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64InstructionSynchronizationBarrier>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64GetCNTFRQ>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64GetCNTPCT>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64GetCTR>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64GetDCZID>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64GetTPIDR>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64GetTPIDRRO>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64SetTPIDR>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,125 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <biscuit/assembler.hpp>
#include <fmt/ostream.h>
#include "dynarmic/backend/riscv64/a32_jitstate.h"
#include "dynarmic/backend/riscv64/abi.h"
#include "dynarmic/backend/riscv64/emit_context.h"
#include "dynarmic/backend/riscv64/emit_riscv64.h"
#include "dynarmic/backend/riscv64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::RV64 {
template<>
void EmitIR<IR::Opcode::A64ClearExclusive>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64ReadMemory8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64ReadMemory16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64ReadMemory32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64ReadMemory64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64ReadMemory128>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveReadMemory8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveReadMemory16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveReadMemory32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveReadMemory64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveReadMemory128>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64WriteMemory8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64WriteMemory16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64WriteMemory32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64WriteMemory64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64WriteMemory128>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory128>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,100 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <biscuit/assembler.hpp>
#include <fmt/ostream.h>
#include "dynarmic/backend/riscv64/a32_jitstate.h"
#include "dynarmic/backend/riscv64/abi.h"
#include "dynarmic/backend/riscv64/emit_context.h"
#include "dynarmic/backend/riscv64/emit_riscv64.h"
#include "dynarmic/backend/riscv64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::RV64 {
template<>
void EmitIR<IR::Opcode::CRC32Castagnoli8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::CRC32Castagnoli16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::CRC32Castagnoli32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::CRC32Castagnoli64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::CRC32ISO8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::CRC32ISO16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::CRC32ISO32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::CRC32ISO64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::AESDecryptSingleRound>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::AESEncryptSingleRound>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::AESInverseMixColumns>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::AESMixColumns>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SM4AccessSubstitutionBox>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SHA256Hash>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SHA256MessageSchedule0>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SHA256MessageSchedule1>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,572 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <biscuit/assembler.hpp>
#include <fmt/ostream.h>
#include "dynarmic/backend/riscv64/a32_jitstate.h"
#include "dynarmic/backend/riscv64/abi.h"
#include "dynarmic/backend/riscv64/emit_context.h"
#include "dynarmic/backend/riscv64/emit_riscv64.h"
#include "dynarmic/backend/riscv64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::RV64 {
template<>
void EmitIR<IR::Opcode::Pack2x32To1x64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::Pack2x64To1x128>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::LeastSignificantWord>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::LeastSignificantHalf>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::LeastSignificantByte>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::MostSignificantWord>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::MostSignificantBit>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::IsZero32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::IsZero64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::TestBit>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ConditionalSelect32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ConditionalSelect64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ConditionalSelectNZCV>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::LogicalShiftLeft32>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
const auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto& operand_arg = args[0];
auto& shift_arg = args[1];
auto& carry_arg = args[2];
// TODO: Add full implementation
ASSERT(carry_inst != nullptr);
ASSERT(shift_arg.IsImmediate());
auto Xresult = ctx.reg_alloc.WriteX(inst);
auto Xcarry_out = ctx.reg_alloc.WriteX(carry_inst);
auto Xoperand = ctx.reg_alloc.ReadX(operand_arg);
auto Xcarry_in = ctx.reg_alloc.ReadX(carry_arg);
RegAlloc::Realize(Xresult, Xcarry_out, Xoperand, Xcarry_in);
const u8 shift = shift_arg.GetImmediateU8();
if (shift == 0) {
as.ADDW(Xresult, Xoperand, biscuit::zero);
as.ADDW(Xcarry_out, Xcarry_in, biscuit::zero);
} else if (shift < 32) {
as.SRLIW(Xcarry_out, Xoperand, 32 - shift);
as.ANDI(Xcarry_out, Xcarry_out, 1);
as.SLLIW(Xresult, Xoperand, shift);
} else if (shift > 32) {
as.MV(Xresult, biscuit::zero);
as.MV(Xcarry_out, biscuit::zero);
} else {
as.ANDI(Xcarry_out, Xresult, 1);
as.MV(Xresult, biscuit::zero);
}
}
template<>
void EmitIR<IR::Opcode::LogicalShiftLeft64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::LogicalShiftRight32>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
const auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto& operand_arg = args[0];
auto& shift_arg = args[1];
// TODO: Add full implementation
ASSERT(carry_inst == nullptr);
ASSERT(shift_arg.IsImmediate());
const u8 shift = shift_arg.GetImmediateU8();
auto Xresult = ctx.reg_alloc.WriteX(inst);
auto Xoperand = ctx.reg_alloc.ReadX(operand_arg);
RegAlloc::Realize(Xresult, Xoperand);
if (shift <= 31) {
as.SRLIW(Xresult, Xoperand, shift);
} else {
as.MV(Xresult, biscuit::zero);
}
}
template<>
void EmitIR<IR::Opcode::LogicalShiftRight64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ArithmeticShiftRight32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ArithmeticShiftRight64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::RotateRight32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::RotateRight64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::RotateRightExtended>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::LogicalShiftLeftMasked32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::LogicalShiftLeftMasked64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::LogicalShiftRightMasked32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::LogicalShiftRightMasked64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ArithmeticShiftRightMasked32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ArithmeticShiftRightMasked64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::RotateRightMasked32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::RotateRightMasked64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<size_t bitsize>
static void AddImmWithFlags(biscuit::Assembler& as, biscuit::GPR rd, biscuit::GPR rs, u64 imm, biscuit::GPR flags) {
static_assert(bitsize == 32 || bitsize == 64);
if constexpr (bitsize == 32) {
imm = static_cast<u32>(imm);
}
if (mcl::bit::sign_extend<12>(imm) == imm) {
bitsize == 32 ? as.ADDIW(rd, rs, imm) : as.ADDI(rd, rs, imm);
} else {
as.LI(Xscratch0, imm);
bitsize == 32 ? as.ADDW(rd, rs, Xscratch0) : as.ADD(rd, rs, Xscratch0);
}
// N
as.SEQZ(flags, rd);
as.SLLI(flags, flags, 30);
// Z
as.SLTZ(Xscratch1, rd);
as.SLLI(Xscratch1, Xscratch1, 31);
as.OR(flags, flags, Xscratch1);
if constexpr (bitsize == 32) {
// C
if (mcl::bit::sign_extend<12>(imm) == imm) {
as.ADDI(Xscratch1, rs, imm);
} else {
as.ADD(Xscratch1, rs, Xscratch0);
}
as.SRLI(Xscratch1, Xscratch1, 3);
as.LUI(Xscratch0, 0x20000);
as.AND(Xscratch1, Xscratch1, Xscratch0);
as.OR(flags, flags, Xscratch1);
// V
as.LI(Xscratch0, imm);
as.ADD(Xscratch1, rs, Xscratch0);
as.XOR(Xscratch0, Xscratch0, rs);
as.NOT(Xscratch0, Xscratch0);
as.XOR(Xscratch1, Xscratch1, rs);
as.AND(Xscratch1, Xscratch0, Xscratch1);
as.SRLIW(Xscratch1, Xscratch1, 31);
as.SLLI(Xscratch1, Xscratch1, 28);
as.OR(flags, flags, Xscratch1);
} else {
UNIMPLEMENTED();
}
}
template<size_t bitsize, bool sub>
static void EmitAddSub(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
const auto nzcv_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetNZCVFromOp);
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto Xresult = ctx.reg_alloc.WriteX(inst);
auto Xa = ctx.reg_alloc.ReadX(args[0]);
if (overflow_inst) {
UNIMPLEMENTED();
} else if (nzcv_inst) {
if (args[1].IsImmediate()) {
const u64 imm = args[1].GetImmediateU64();
if (args[2].IsImmediate()) {
auto Xflags = ctx.reg_alloc.WriteX(nzcv_inst);
RegAlloc::Realize(Xresult, Xflags, Xa);
if (args[2].GetImmediateU1()) {
AddImmWithFlags<bitsize>(as, *Xresult, *Xa, sub ? ~imm : imm + 1, *Xflags);
} else {
AddImmWithFlags<bitsize>(as, *Xresult, *Xa, sub ? -imm : imm, *Xflags);
}
} else {
UNIMPLEMENTED();
}
} else {
UNIMPLEMENTED();
}
} else {
if (args[1].IsImmediate()) {
const u64 imm = args[1].GetImmediateU64();
if (args[2].IsImmediate()) {
UNIMPLEMENTED();
} else {
auto Xnzcv = ctx.reg_alloc.ReadX(args[2]);
RegAlloc::Realize(Xresult, Xa, Xnzcv);
as.LUI(Xscratch0, 0x20000);
as.AND(Xscratch0, Xnzcv, Xscratch0);
as.SRLI(Xscratch0, Xscratch0, 29);
as.LI(Xscratch1, imm);
as.ADD(Xscratch0, Xscratch0, Xscratch1);
as.ADDW(Xresult, Xa, Xscratch0);
}
} else {
UNIMPLEMENTED();
}
}
}
template<>
void EmitIR<IR::Opcode::Add32>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
EmitAddSub<32, false>(as, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::Add64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::Sub32>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
EmitAddSub<32, true>(as, ctx, inst);
}
template<>
void EmitIR<IR::Opcode::Sub64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::Mul32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::Mul64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignedMultiplyHigh64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::UnsignedMultiplyHigh64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::UnsignedDiv32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::UnsignedDiv64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignedDiv32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignedDiv64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::And32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::And64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::AndNot32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::AndNot64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::Eor32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::Eor64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::Or32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::Or64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::Not32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::Not64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignExtendByteToWord>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignExtendHalfToWord>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignExtendByteToLong>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignExtendHalfToLong>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignExtendWordToLong>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ZeroExtendByteToWord>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ZeroExtendHalfToWord>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ZeroExtendByteToLong>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ZeroExtendHalfToLong>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ZeroExtendWordToLong>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ZeroExtendLongToQuad>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ByteReverseWord>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ByteReverseHalf>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ByteReverseDual>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::CountLeadingZeros32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::CountLeadingZeros64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ExtractRegister32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ExtractRegister64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ReplicateBit32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::ReplicateBit64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::MaxSigned32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::MaxSigned64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::MaxUnsigned32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::MaxUnsigned64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::MinSigned32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::MinSigned64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::MinUnsigned32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::MinUnsigned64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,460 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <biscuit/assembler.hpp>
#include <fmt/ostream.h>
#include "dynarmic/backend/riscv64/a32_jitstate.h"
#include "dynarmic/backend/riscv64/abi.h"
#include "dynarmic/backend/riscv64/emit_context.h"
#include "dynarmic/backend/riscv64/emit_riscv64.h"
#include "dynarmic/backend/riscv64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::RV64 {
template<>
void EmitIR<IR::Opcode::FPAbs16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPAbs32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPAbs64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPAdd32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPAdd64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPCompare32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPCompare64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPDiv32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPDiv64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPMax32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPMax64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPMaxNumeric32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPMaxNumeric64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPMin32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPMin64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPMinNumeric32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPMinNumeric64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPMul32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPMul64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPMulAdd16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPMulAdd32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPMulAdd64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPMulSub16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPMulSub32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPMulSub64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPMulX32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPMulX64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPNeg16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPNeg32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPNeg64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPRecipEstimate16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPRecipEstimate32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPRecipEstimate64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPRecipExponent16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPRecipExponent32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPRecipExponent64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPRecipStepFused16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPRecipStepFused32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPRecipStepFused64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPRoundInt16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPRoundInt32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPRoundInt64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPRSqrtEstimate16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPRSqrtEstimate32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPRSqrtEstimate64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPRSqrtStepFused16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPRSqrtStepFused32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPRSqrtStepFused64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPSqrt32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPSqrt64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPSub32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPSub64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPHalfToDouble>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPHalfToSingle>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPSingleToDouble>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPSingleToHalf>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPDoubleToHalf>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPDoubleToSingle>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPDoubleToFixedS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPDoubleToFixedS32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPDoubleToFixedS64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPDoubleToFixedU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPDoubleToFixedU32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPDoubleToFixedU64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPHalfToFixedS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPHalfToFixedS32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPHalfToFixedS64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPHalfToFixedU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPHalfToFixedU32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPHalfToFixedU64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPSingleToFixedS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPSingleToFixedS32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPSingleToFixedS64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPSingleToFixedU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPSingleToFixedU32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPSingleToFixedU64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPFixedU16ToSingle>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPFixedS16ToSingle>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPFixedU16ToDouble>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPFixedS16ToDouble>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPFixedU32ToSingle>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPFixedS32ToSingle>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPFixedU32ToDouble>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPFixedS32ToDouble>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPFixedU64ToDouble>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPFixedU64ToSingle>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPFixedS64ToDouble>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPFixedS64ToSingle>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,190 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <biscuit/assembler.hpp>
#include <fmt/ostream.h>
#include "dynarmic/backend/riscv64/a32_jitstate.h"
#include "dynarmic/backend/riscv64/abi.h"
#include "dynarmic/backend/riscv64/emit_context.h"
#include "dynarmic/backend/riscv64/emit_riscv64.h"
#include "dynarmic/backend/riscv64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::RV64 {
template<>
void EmitIR<IR::Opcode::PackedAddU8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedAddS8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedSubU8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedSubS8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedAddU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedAddS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedSubU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedSubS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedAddSubU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedAddSubS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedSubAddU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedSubAddS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedHalvingAddU8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedHalvingAddS8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedHalvingSubU8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedHalvingSubS8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedHalvingAddU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedHalvingAddS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedHalvingSubU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedHalvingSubS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedHalvingAddSubU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedHalvingAddSubS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedHalvingSubAddU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedHalvingSubAddS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedSaturatedAddU8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedSaturatedAddS8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedSaturatedSubU8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedSaturatedSubS8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedSaturatedAddU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedSaturatedAddS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedSaturatedSubU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedSaturatedSubS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedAbsDiffSumU8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::PackedSelect>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,130 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <biscuit/assembler.hpp>
#include <fmt/ostream.h>
#include "dynarmic/backend/riscv64/a32_jitstate.h"
#include "dynarmic/backend/riscv64/abi.h"
#include "dynarmic/backend/riscv64/emit_context.h"
#include "dynarmic/backend/riscv64/emit_riscv64.h"
#include "dynarmic/backend/riscv64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::RV64 {
template<>
void EmitIR<IR::Opcode::SignedSaturatedAddWithFlag32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedSubWithFlag32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignedSaturation>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturation>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedAdd8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedAdd16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedAdd32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedAdd64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedDoublingMultiplyReturnHigh16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedDoublingMultiplyReturnHigh32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedSub8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedSub16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedSub32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::SignedSaturatedSub64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturatedAdd8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturatedAdd16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturatedAdd32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturatedAdd64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturatedSub8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturatedSub16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturatedSub32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::UnsignedSaturatedSub64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
} // namespace Dynarmic::Backend::RV64

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,355 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <biscuit/assembler.hpp>
#include <fmt/ostream.h>
#include "dynarmic/backend/riscv64/a32_jitstate.h"
#include "dynarmic/backend/riscv64/abi.h"
#include "dynarmic/backend/riscv64/emit_context.h"
#include "dynarmic/backend/riscv64/emit_riscv64.h"
#include "dynarmic/backend/riscv64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::RV64 {
template<>
void EmitIR<IR::Opcode::FPVectorAbs16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorAbs32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorAbs64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorAdd32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorAdd64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorDiv32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorDiv64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorEqual16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorEqual32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorEqual64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorFromHalf32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorFromSignedFixed32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorFromSignedFixed64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorFromUnsignedFixed32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorFromUnsignedFixed64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorGreater32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorGreater64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorGreaterEqual32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorGreaterEqual64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorMax32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorMax64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorMaxNumeric32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorMaxNumeric64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorMin32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorMin64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorMinNumeric32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorMinNumeric64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorMul32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorMul64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorMulAdd16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorMulAdd32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorMulAdd64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorMulX32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorMulX64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorNeg16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorNeg32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorNeg64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorPairedAdd32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorPairedAdd64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorPairedAddLower32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorPairedAddLower64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorRecipEstimate16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorRecipEstimate32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorRecipEstimate64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorRecipStepFused16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorRecipStepFused32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorRecipStepFused64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorRoundInt16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorRoundInt32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorRoundInt64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorRSqrtEstimate16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorRSqrtEstimate32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorRSqrtEstimate64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorRSqrtStepFused16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorRSqrtStepFused32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorRSqrtStepFused64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorSqrt32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorSqrt64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorSub32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorSub64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorToHalf32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorToSignedFixed16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorToSignedFixed32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorToSignedFixed64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorToUnsignedFixed16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorToUnsignedFixed32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::FPVectorToUnsignedFixed64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,100 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <biscuit/assembler.hpp>
#include <fmt/ostream.h>
#include "dynarmic/backend/riscv64/a32_jitstate.h"
#include "dynarmic/backend/riscv64/abi.h"
#include "dynarmic/backend/riscv64/emit_context.h"
#include "dynarmic/backend/riscv64/emit_riscv64.h"
#include "dynarmic/backend/riscv64/reg_alloc.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::Backend::RV64 {
template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedAdd8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedAdd16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedAdd32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedAdd64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedSub8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedSub16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedSub32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::VectorSignedSaturatedSub64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::VectorUnsignedSaturatedAdd8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::VectorUnsignedSaturatedAdd16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::VectorUnsignedSaturatedAdd32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::VectorUnsignedSaturatedAdd64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::VectorUnsignedSaturatedSub8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::VectorUnsignedSaturatedSub16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::VectorUnsignedSaturatedSub32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::VectorUnsignedSaturatedSub64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,352 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/riscv64/reg_alloc.h"
#include <algorithm>
#include <array>
#include "dynarmic/common/assert.h"
#include <mcl/mp/metavalue/lift_value.hpp>
#include "dynarmic/common/common_types.h"
#include "dynarmic/common/always_false.h"
namespace Dynarmic::Backend::RV64 {
constexpr size_t spill_offset = offsetof(StackLayout, spill);
constexpr size_t spill_slot_size = sizeof(decltype(StackLayout::spill)::value_type);
static bool IsValuelessType(IR::Type type) {
switch (type) {
case IR::Type::Table:
return true;
default:
return false;
}
}
IR::Type Argument::GetType() const {
return value.GetType();
}
bool Argument::IsImmediate() const {
return value.IsImmediate();
}
bool Argument::GetImmediateU1() const {
return value.GetU1();
}
u8 Argument::GetImmediateU8() const {
const u64 imm = value.GetImmediateAsU64();
ASSERT(imm < 0x100);
return u8(imm);
}
u16 Argument::GetImmediateU16() const {
const u64 imm = value.GetImmediateAsU64();
ASSERT(imm < 0x10000);
return u16(imm);
}
u32 Argument::GetImmediateU32() const {
const u64 imm = value.GetImmediateAsU64();
ASSERT(imm < 0x100000000);
return u32(imm);
}
u64 Argument::GetImmediateU64() const {
return value.GetImmediateAsU64();
}
IR::Cond Argument::GetImmediateCond() const {
ASSERT(IsImmediate() && GetType() == IR::Type::Cond);
return value.GetCond();
}
IR::AccType Argument::GetImmediateAccType() const {
ASSERT(IsImmediate() && GetType() == IR::Type::AccType);
return value.GetAccType();
}
bool HostLocInfo::Contains(const IR::Inst* value) const {
return std::find(values.begin(), values.end(), value) != values.end();
}
void HostLocInfo::SetupScratchLocation() {
ASSERT(IsCompletelyEmpty());
realized = true;
}
bool HostLocInfo::IsCompletelyEmpty() const {
return values.empty() && !locked && !realized && !accumulated_uses && !expected_uses && !uses_this_inst;
}
void HostLocInfo::UpdateUses() {
accumulated_uses += uses_this_inst;
uses_this_inst = 0;
if (accumulated_uses == expected_uses) {
values.clear();
accumulated_uses = 0;
expected_uses = 0;
}
}
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
for (size_t i = 0; i < inst->NumArgs(); i++) {
const IR::Value arg = inst->GetArg(i);
ret[i].value = arg;
if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) {
ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined");
ValueInfo(arg.GetInst()).uses_this_inst++;
}
}
return ret;
}
bool RegAlloc::IsValueLive(IR::Inst* inst) const {
return !!ValueLocation(inst);
}
void RegAlloc::UpdateAllUses() {
for (auto& gpr : gprs) {
gpr.UpdateUses();
}
for (auto& fpr : fprs) {
fpr.UpdateUses();
}
for (auto& spill : spills) {
spill.UpdateUses();
}
}
void RegAlloc::DefineAsExisting(IR::Inst* inst, Argument& arg) {
ASSERT(!ValueLocation(inst));
if (arg.value.IsImmediate()) {
inst->ReplaceUsesWith(arg.value);
return;
}
auto& info = ValueInfo(arg.value.GetInst());
info.values.emplace_back(inst);
info.expected_uses += inst->UseCount();
}
void RegAlloc::AssertNoMoreUses() const {
const auto is_empty = [](const auto& i) { return i.IsCompletelyEmpty(); };
ASSERT(std::all_of(gprs.begin(), gprs.end(), is_empty));
ASSERT(std::all_of(fprs.begin(), fprs.end(), is_empty));
ASSERT(std::all_of(spills.begin(), spills.end(), is_empty));
}
template<HostLoc::Kind kind>
u32 RegAlloc::GenerateImmediate(const IR::Value& value) {
// TODO
// ASSERT(value.GetType() != IR::Type::U1);
if constexpr (kind == HostLoc::Kind::Gpr) {
const u32 new_location_index = AllocateRegister(gprs, gpr_order);
SpillGpr(new_location_index);
gprs[new_location_index].SetupScratchLocation();
as.LI(biscuit::GPR{new_location_index}, value.GetImmediateAsU64());
return new_location_index;
} else if constexpr (kind == HostLoc::Kind::Fpr) {
UNIMPLEMENTED();
} else {
static_assert(Common::always_false_v<mcl::mp::lift_value<kind>>);
}
return 0;
}
template<HostLoc::Kind required_kind>
u32 RegAlloc::RealizeReadImpl(const IR::Value& value) {
if (value.IsImmediate()) {
return GenerateImmediate<required_kind>(value);
}
const auto current_location = ValueLocation(value.GetInst());
ASSERT(current_location);
if (current_location->kind == required_kind) {
ValueInfo(*current_location).realized = true;
return current_location->index;
}
ASSERT(!ValueInfo(*current_location).realized);
ASSERT(!ValueInfo(*current_location).locked);
if constexpr (required_kind == HostLoc::Kind::Gpr) {
const u32 new_location_index = AllocateRegister(gprs, gpr_order);
SpillGpr(new_location_index);
switch (current_location->kind) {
case HostLoc::Kind::Gpr:
ASSERT_FALSE("Logic error");
break;
case HostLoc::Kind::Fpr:
as.FMV_X_D(biscuit::GPR(new_location_index), biscuit::FPR{current_location->index});
// ASSERT size fits
break;
case HostLoc::Kind::Spill:
as.LD(biscuit::GPR{new_location_index}, spill_offset + current_location->index * spill_slot_size, biscuit::sp);
break;
}
gprs[new_location_index] = std::exchange(ValueInfo(*current_location), {});
gprs[new_location_index].realized = true;
return new_location_index;
} else if constexpr (required_kind == HostLoc::Kind::Fpr) {
const u32 new_location_index = AllocateRegister(fprs, fpr_order);
SpillFpr(new_location_index);
switch (current_location->kind) {
case HostLoc::Kind::Gpr:
as.FMV_D_X(biscuit::FPR{new_location_index}, biscuit::GPR(current_location->index));
break;
case HostLoc::Kind::Fpr:
ASSERT_FALSE("Logic error");
break;
case HostLoc::Kind::Spill:
as.FLD(biscuit::FPR{new_location_index}, spill_offset + current_location->index * spill_slot_size, biscuit::sp);
break;
}
fprs[new_location_index] = std::exchange(ValueInfo(*current_location), {});
fprs[new_location_index].realized = true;
return new_location_index;
} else {
static_assert(Common::always_false_v<mcl::mp::lift_value<required_kind>>);
}
}
template<HostLoc::Kind required_kind>
u32 RegAlloc::RealizeWriteImpl(const IR::Inst* value) {
ASSERT(!ValueLocation(value));
const auto setup_location = [&](HostLocInfo& info) {
info = {};
info.values.emplace_back(value);
info.locked = true;
info.realized = true;
info.expected_uses = value->UseCount();
};
if constexpr (required_kind == HostLoc::Kind::Gpr) {
const u32 new_location_index = AllocateRegister(gprs, gpr_order);
SpillGpr(new_location_index);
setup_location(gprs[new_location_index]);
return new_location_index;
} else if constexpr (required_kind == HostLoc::Kind::Fpr) {
const u32 new_location_index = AllocateRegister(fprs, fpr_order);
SpillFpr(new_location_index);
setup_location(fprs[new_location_index]);
return new_location_index;
} else {
static_assert(Common::always_false_v<mcl::mp::lift_value<required_kind>>);
}
}
template u32 RegAlloc::RealizeReadImpl<HostLoc::Kind::Gpr>(const IR::Value& value);
template u32 RegAlloc::RealizeReadImpl<HostLoc::Kind::Fpr>(const IR::Value& value);
template u32 RegAlloc::RealizeWriteImpl<HostLoc::Kind::Gpr>(const IR::Inst* value);
template u32 RegAlloc::RealizeWriteImpl<HostLoc::Kind::Fpr>(const IR::Inst* value);
u32 RegAlloc::AllocateRegister(const std::array<HostLocInfo, 32>& regs, const std::vector<u32>& order) const {
const auto empty = std::find_if(order.begin(), order.end(), [&](u32 i) { return regs[i].values.empty() && !regs[i].locked; });
if (empty != order.end()) {
return *empty;
}
std::vector<u32> candidates;
std::copy_if(order.begin(), order.end(), std::back_inserter(candidates), [&](u32 i) { return !regs[i].locked; });
// TODO: LRU
std::uniform_int_distribution<size_t> dis{0, candidates.size() - 1};
return candidates[dis(rand_gen)];
}
void RegAlloc::SpillGpr(u32 index) {
ASSERT(!gprs[index].locked && !gprs[index].realized);
if (gprs[index].values.empty()) {
return;
}
const u32 new_location_index = FindFreeSpill();
as.SD(biscuit::GPR{index}, spill_offset + new_location_index * spill_slot_size, biscuit::sp);
spills[new_location_index] = std::exchange(gprs[index], {});
}
void RegAlloc::SpillFpr(u32 index) {
ASSERT(!fprs[index].locked && !fprs[index].realized);
if (fprs[index].values.empty()) {
return;
}
const u32 new_location_index = FindFreeSpill();
as.FSD(biscuit::FPR{index}, spill_offset + new_location_index * spill_slot_size, biscuit::sp);
spills[new_location_index] = std::exchange(fprs[index], {});
}
u32 RegAlloc::FindFreeSpill() const {
const auto iter = std::find_if(spills.begin(), spills.end(), [](const HostLocInfo& info) { return info.values.empty(); });
ASSERT_MSG(iter != spills.end(), "All spill locations are full");
return static_cast<u32>(iter - spills.begin());
}
std::optional<HostLoc> RegAlloc::ValueLocation(const IR::Inst* value) const {
const auto contains_value = [value](const HostLocInfo& info) {
return info.Contains(value);
};
if (const auto iter = std::find_if(gprs.begin(), gprs.end(), contains_value); iter != gprs.end()) {
return HostLoc{HostLoc::Kind::Gpr, static_cast<u32>(iter - gprs.begin())};
}
if (const auto iter = std::find_if(fprs.begin(), fprs.end(), contains_value); iter != fprs.end()) {
return HostLoc{HostLoc::Kind::Fpr, static_cast<u32>(iter - fprs.begin())};
}
if (const auto iter = std::find_if(spills.begin(), spills.end(), contains_value); iter != spills.end()) {
return HostLoc{HostLoc::Kind::Spill, static_cast<u32>(iter - spills.begin())};
}
return std::nullopt;
}
HostLocInfo& RegAlloc::ValueInfo(HostLoc host_loc) {
switch (host_loc.kind) {
case HostLoc::Kind::Gpr:
return gprs[static_cast<size_t>(host_loc.index)];
case HostLoc::Kind::Fpr:
return fprs[static_cast<size_t>(host_loc.index)];
case HostLoc::Kind::Spill:
return spills[static_cast<size_t>(host_loc.index)];
}
ASSERT_FALSE("RegAlloc::ValueInfo: Invalid HostLoc::Kind");
}
HostLocInfo& RegAlloc::ValueInfo(const IR::Inst* value) {
const auto contains_value = [value](const HostLocInfo& info) {
return info.Contains(value);
};
if (const auto iter = std::find_if(gprs.begin(), gprs.end(), contains_value); iter != gprs.end()) {
return *iter;
}
if (const auto iter = std::find_if(fprs.begin(), fprs.end(), contains_value); iter != gprs.end()) {
return *iter;
}
if (const auto iter = std::find_if(spills.begin(), spills.end(), contains_value); iter != gprs.end()) {
return *iter;
}
ASSERT_FALSE("RegAlloc::ValueInfo: Value not found");
}
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,191 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <array>
#include <optional>
#include <random>
#include <utility>
#include <vector>
#include <biscuit/assembler.hpp>
#include <biscuit/registers.hpp>
#include "dynarmic/common/assert.h"
#include "dynarmic/common/common_types.h"
#include <mcl/type_traits/is_instance_of_template.hpp>
#include <ankerl/unordered_dense.h>
#include "dynarmic/backend/riscv64/stack_layout.h"
#include "dynarmic/ir/cond.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/value.h"
namespace Dynarmic::Backend::RV64 {
class RegAlloc;
struct HostLoc {
enum class Kind {
Gpr,
Fpr,
Spill,
} kind;
u32 index;
};
struct Argument {
public:
using copyable_reference = std::reference_wrapper<Argument>;
IR::Type GetType() const;
bool IsImmediate() const;
bool GetImmediateU1() const;
u8 GetImmediateU8() const;
u16 GetImmediateU16() const;
u32 GetImmediateU32() const;
u64 GetImmediateU64() const;
IR::Cond GetImmediateCond() const;
IR::AccType GetImmediateAccType() const;
private:
friend class RegAlloc;
explicit Argument(RegAlloc& reg_alloc)
: reg_alloc{reg_alloc} {}
bool allocated = false;
RegAlloc& reg_alloc;
IR::Value value;
};
template<typename T>
struct RAReg {
public:
static constexpr HostLoc::Kind kind = std::is_base_of_v<biscuit::FPR, T>
? HostLoc::Kind::Fpr
: HostLoc::Kind::Gpr;
operator T() const { return *reg; }
T operator*() const { return *reg; }
const T* operator->() const { return &*reg; }
~RAReg();
private:
friend class RegAlloc;
explicit RAReg(RegAlloc& reg_alloc, bool write, const IR::Value& value);
void Realize();
RegAlloc& reg_alloc;
bool write;
const IR::Value value;
std::optional<T> reg;
};
struct HostLocInfo final {
std::vector<const IR::Inst*> values;
size_t locked = 0;
bool realized = false;
size_t uses_this_inst = 0;
size_t accumulated_uses = 0;
size_t expected_uses = 0;
bool Contains(const IR::Inst*) const;
void SetupScratchLocation();
bool IsCompletelyEmpty() const;
void UpdateUses();
};
class RegAlloc {
public:
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
explicit RegAlloc(biscuit::Assembler& as, std::vector<u32> gpr_order, std::vector<u32> fpr_order)
: as{as}, gpr_order{gpr_order}, fpr_order{fpr_order}, rand_gen{std::random_device{}()} {}
ArgumentInfo GetArgumentInfo(IR::Inst* inst);
bool IsValueLive(IR::Inst* inst) const;
auto ReadX(Argument& arg) { return RAReg<biscuit::GPR>{*this, false, arg.value}; }
auto ReadD(Argument& arg) { return RAReg<biscuit::FPR>{*this, false, arg.value}; }
auto WriteX(IR::Inst* inst) { return RAReg<biscuit::GPR>{*this, true, IR::Value{inst}}; }
auto WriteD(IR::Inst* inst) { return RAReg<biscuit::FPR>{*this, true, IR::Value{inst}}; }
void DefineAsExisting(IR::Inst* inst, Argument& arg);
void SpillAll();
template<typename... Ts>
static void Realize(Ts&... rs) {
static_assert((mcl::is_instance_of_template<RAReg, Ts>() && ...));
(rs.Realize(), ...);
}
void UpdateAllUses();
void AssertNoMoreUses() const;
private:
template<typename>
friend struct RAReg;
template<HostLoc::Kind kind>
u32 GenerateImmediate(const IR::Value& value);
template<HostLoc::Kind kind>
u32 RealizeReadImpl(const IR::Value& value);
template<HostLoc::Kind kind>
u32 RealizeWriteImpl(const IR::Inst* value);
u32 AllocateRegister(const std::array<HostLocInfo, 32>& regs, const std::vector<u32>& order) const;
void SpillGpr(u32 index);
void SpillFpr(u32 index);
u32 FindFreeSpill() const;
std::optional<HostLoc> ValueLocation(const IR::Inst* value) const;
HostLocInfo& ValueInfo(HostLoc host_loc);
HostLocInfo& ValueInfo(const IR::Inst* value);
biscuit::Assembler& as;
std::vector<u32> gpr_order;
std::vector<u32> fpr_order;
std::array<HostLocInfo, 32> gprs;
std::array<HostLocInfo, 32> fprs;
std::array<HostLocInfo, SpillCount> spills;
mutable std::mt19937 rand_gen;
};
template<typename T>
RAReg<T>::RAReg(RegAlloc& reg_alloc, bool write, const IR::Value& value)
: reg_alloc{reg_alloc}, write{write}, value{value} {
if (!write && !value.IsImmediate()) {
reg_alloc.ValueInfo(value.GetInst()).locked++;
}
}
template<typename T>
RAReg<T>::~RAReg() {
if (!value.IsImmediate()) {
reg_alloc.ValueInfo(value.GetInst()).locked--;
}
if (reg) {
reg_alloc.ValueInfo(HostLoc{kind, reg->Index()}).realized = false;
}
}
template<typename T>
void RAReg<T>::Realize() {
reg = T{write ? reg_alloc.RealizeWriteImpl<kind>(value.GetInst()) : reg_alloc.RealizeReadImpl<kind>(value)};
}
} // namespace Dynarmic::Backend::RV64

View file

@ -0,0 +1,33 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2024 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <array>
#include "dynarmic/common/common_types.h"
namespace Dynarmic::Backend::RV64 {
constexpr size_t SpillCount = 64;
struct alignas(16) StackLayout {
s64 cycles_remaining;
s64 cycles_to_run;
std::array<u64, SpillCount> spill;
u32 save_host_fpcr;
u32 save_host_fpsr;
bool check_bit;
};
static_assert(sizeof(StackLayout) % 16 == 0);
} // namespace Dynarmic::Backend::RV64

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,146 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <array>
#include <optional>
#include <set>
#include <tuple>
#include <ankerl/unordered_dense.h>
#include "dynarmic/backend/block_range_information.h"
#include "dynarmic/backend/x64/a32_jitstate.h"
#include "dynarmic/backend/x64/emit_x64.h"
#include "dynarmic/backend/x64/reg_alloc.h"
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
#include "dynarmic/interface/A32/a32.h"
#include "dynarmic/interface/A32/config.h"
#include "dynarmic/ir/terminal.h"
namespace Dynarmic::Backend::X64 {
class RegAlloc;
struct A32EmitContext final : public EmitContext {
A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block);
A32::LocationDescriptor Location() const;
A32::LocationDescriptor EndLocation() const;
bool IsSingleStep() const;
FP::FPCR FPCR(bool fpcr_controlled = true) const override;
bool HasOptimization(OptimizationFlag flag) const override {
return conf.HasOptimization(flag);
}
const A32::UserConfig& conf;
};
class A32EmitX64 final : public EmitX64 {
public:
A32EmitX64(BlockOfCode& code, A32::UserConfig conf, A32::Jit* jit_interface);
~A32EmitX64() override;
/**
* Emit host machine code for a basic block with intermediate representation `block`.
* @note block is modified.
*/
BlockDescriptor Emit(IR::Block& block);
void ClearCache() override;
void InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges);
protected:
void EmitCondPrelude(const A32EmitContext& ctx);
struct FastDispatchEntry {
u64 location_descriptor = 0xFFFF'FFFF'FFFF'FFFFull;
const void* code_ptr = nullptr;
};
static_assert(sizeof(FastDispatchEntry) == 0x10);
static constexpr u64 fast_dispatch_table_mask = 0xFFFF0;
static constexpr size_t fast_dispatch_table_size = 0x10000;
void ClearFastDispatchTable();
void GenFastmemFallbacks();
void GenTerminalHandlers();
// Microinstruction emitters
#define OPCODE(...)
#define A32OPC(name, type, ...) void EmitA32##name(A32EmitContext& ctx, IR::Inst* inst);
#define A64OPC(...)
#include "dynarmic/ir/opcodes.inc"
#undef OPCODE
#undef A32OPC
#undef A64OPC
// Helpers
std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
// Fastmem information
using DoNotFastmemMarker = std::tuple<IR::LocationDescriptor, unsigned>;
struct FastmemPatchInfo {
u64 resume_rip;
u64 callback;
DoNotFastmemMarker marker;
bool recompile;
};
std::optional<DoNotFastmemMarker> ShouldFastmem(A32EmitContext& ctx, IR::Inst* inst) const;
FakeCall FastmemCallback(u64 rip);
// Memory access helpers
void EmitCheckMemoryAbort(A32EmitContext& ctx, IR::Inst* inst, Xbyak::Label* end = nullptr);
template<std::size_t bitsize, auto callback>
void EmitMemoryRead(A32EmitContext& ctx, IR::Inst* inst);
template<std::size_t bitsize, auto callback>
void EmitMemoryWrite(A32EmitContext& ctx, IR::Inst* inst);
template<std::size_t bitsize, auto callback>
void EmitExclusiveReadMemory(A32EmitContext& ctx, IR::Inst* inst);
template<std::size_t bitsize, auto callback>
void EmitExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst);
template<std::size_t bitsize, auto callback>
void EmitExclusiveReadMemoryInline(A32EmitContext& ctx, IR::Inst* inst);
template<std::size_t bitsize, auto callback>
void EmitExclusiveWriteMemoryInline(A32EmitContext& ctx, IR::Inst* inst);
// Terminal instruction emitters
void EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location);
void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
// Patching
void Unpatch(const IR::LocationDescriptor& target_desc) override;
void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;
const A32::UserConfig conf;
RegAlloc reg_alloc; //reusable reg alloc
BlockRangeInformation<u32> block_ranges;
std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
ankerl::unordered_dense::map<u64, FastmemPatchInfo> fastmem_patch_info;
std::set<DoNotFastmemMarker> do_not_fastmem;
std::map<std::tuple<bool, size_t, int, int>, void (*)()> read_fallbacks;
std::map<std::tuple<bool, size_t, int, int>, void (*)()> write_fallbacks;
std::map<std::tuple<bool, size_t, int, int>, void (*)()> exclusive_write_fallbacks;
void (*memory_read_128)() = nullptr; // Dummy
void (*memory_write_128)() = nullptr; // Dummy
const void* terminal_handler_pop_rsb_hint;
const void* terminal_handler_fast_dispatch_hint = nullptr;
FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr;
A32::Jit* jit_interface;
};
} // namespace Dynarmic::Backend::X64

Some files were not shown because too many files have changed in this diff Show more