mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-04-20 16:58:55 +02:00
[cmake] refactor: Use CPM over submodules (#143)
Transfers the majority of submodules and large externals to CPM, using source archives rather than full Git clones. Not only does this save massive amounts of clone and configure time, but dependencies are grabbed on-demand rather than being required by default. Additionally, CPM will (generally) automatically search for system dependencies, though certain dependencies have options to control this. Testing shows gains ranging from 5x to 10x in terms of overall clone/configure time. Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/143 Reviewed-by: CamilleLaVey <camillelavey99@gmail.com>
This commit is contained in:
parent
04e5e64538
commit
51b170b470
4035 changed files with 709 additions and 1033458 deletions
|
|
@ -6,6 +6,13 @@
|
|||
# Enable modules to include each other's files
|
||||
include_directories(.)
|
||||
|
||||
# Dynarmic
|
||||
if ((ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64))
|
||||
set(DYNARMIC_IGNORE_ASSERTS ON)
|
||||
add_subdirectory(dynarmic)
|
||||
add_library(dynarmic::dynarmic ALIAS dynarmic)
|
||||
endif()
|
||||
|
||||
# CMake seems to only define _DEBUG on Windows
|
||||
set_property(DIRECTORY APPEND PROPERTY
|
||||
COMPILE_DEFINITIONS $<$<CONFIG:Debug>:_DEBUG> $<$<NOT:$<CONFIG:Debug>>:NDEBUG>)
|
||||
|
|
|
|||
|
|
@ -226,7 +226,9 @@ else()
|
|||
)
|
||||
endif()
|
||||
|
||||
target_link_libraries(audio_core PUBLIC common core Opus::opus)
|
||||
target_include_directories(audio_core PRIVATE ${OPUS_INCLUDE_DIRS})
|
||||
target_link_libraries(audio_core PUBLIC common core opus)
|
||||
|
||||
if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
|
||||
target_link_libraries(audio_core PRIVATE dynarmic::dynarmic)
|
||||
endif()
|
||||
|
|
@ -237,7 +239,7 @@ if (ENABLE_CUBEB)
|
|||
sink/cubeb_sink.h
|
||||
)
|
||||
|
||||
target_link_libraries(audio_core PRIVATE cubeb::cubeb)
|
||||
target_link_libraries(audio_core PRIVATE cubeb)
|
||||
target_compile_definitions(audio_core PRIVATE -DHAVE_CUBEB=1)
|
||||
endif()
|
||||
|
||||
|
|
|
|||
|
|
@ -1184,6 +1184,7 @@ else()
|
|||
)
|
||||
endif()
|
||||
|
||||
target_include_directories(core PRIVATE ${OPUS_INCLUDE_DIRS})
|
||||
target_link_libraries(core PUBLIC common PRIVATE audio_core hid_core network video_core nx_tzdb tz)
|
||||
target_link_libraries(core PUBLIC Boost::headers PRIVATE fmt::fmt nlohmann_json::nlohmann_json mbedtls RenderDoc::API)
|
||||
if (MINGW)
|
||||
|
|
|
|||
214
src/dynarmic/.clang-format
Normal file
214
src/dynarmic/.clang-format
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
---
|
||||
Language: Cpp
|
||||
AccessModifierOffset: -4
|
||||
AlignAfterOpenBracket: Align
|
||||
AlignConsecutiveMacros: None
|
||||
AlignConsecutiveAssignments: None
|
||||
AlignConsecutiveBitFields: None
|
||||
AlignConsecutiveDeclarations: None
|
||||
AlignEscapedNewlines: Right
|
||||
AlignOperands: AlignAfterOperator
|
||||
AlignTrailingComments: true
|
||||
AllowAllArgumentsOnNextLine: true
|
||||
AllowAllConstructorInitializersOnNextLine: true
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortEnumsOnASingleLine: true
|
||||
AllowShortBlocksOnASingleLine: Empty
|
||||
AllowShortCaseLabelsOnASingleLine: false
|
||||
AllowShortFunctionsOnASingleLine: Inline
|
||||
AllowShortLambdasOnASingleLine: All
|
||||
AllowShortIfStatementsOnASingleLine: Never
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
AlwaysBreakAfterDefinitionReturnType: None
|
||||
AlwaysBreakAfterReturnType: None
|
||||
AlwaysBreakBeforeMultilineStrings: true
|
||||
AlwaysBreakTemplateDeclarations: Yes
|
||||
AttributeMacros:
|
||||
- __capability
|
||||
BinPackArguments: true
|
||||
BinPackParameters: false
|
||||
BitFieldColonSpacing: Both
|
||||
BraceWrapping:
|
||||
AfterCaseLabel: false
|
||||
AfterClass: false
|
||||
AfterControlStatement: Never
|
||||
AfterEnum: false
|
||||
AfterFunction: false
|
||||
AfterNamespace: false
|
||||
AfterObjCDeclaration: false
|
||||
AfterStruct: false
|
||||
AfterUnion: false
|
||||
AfterExternBlock: false
|
||||
BeforeCatch: false
|
||||
BeforeElse: false
|
||||
BeforeLambdaBody: false
|
||||
BeforeWhile: false
|
||||
IndentBraces: false
|
||||
SplitEmptyFunction: false
|
||||
SplitEmptyRecord: false
|
||||
SplitEmptyNamespace: false
|
||||
BreakBeforeBinaryOperators: All
|
||||
BreakBeforeBraces: Custom
|
||||
BreakBeforeConceptDeclarations: true
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakBeforeInheritanceComma: false
|
||||
BreakConstructorInitializersBeforeComma: true
|
||||
BreakConstructorInitializers: BeforeComma
|
||||
BreakInheritanceList: BeforeComma
|
||||
BreakAfterJavaFieldAnnotations: false
|
||||
BreakStringLiterals: true
|
||||
ColumnLimit: 0
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
CompactNamespaces: false
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
||||
ConstructorInitializerIndentWidth: 8
|
||||
ContinuationIndentWidth: 4
|
||||
Cpp11BracedListStyle: true
|
||||
DeriveLineEnding: true
|
||||
DerivePointerAlignment: false
|
||||
DisableFormat: false
|
||||
# EmptyLineAfterAccessModifier: Leave
|
||||
EmptyLineBeforeAccessModifier: Always
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
FixNamespaceComments: true
|
||||
ForEachMacros:
|
||||
- foreach
|
||||
- Q_FOREACH
|
||||
- BOOST_FOREACH
|
||||
IncludeBlocks: Regroup
|
||||
IncludeCategories:
|
||||
- Regex: '^<mach/'
|
||||
Priority: 1
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '^<windows.h>'
|
||||
Priority: 1
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '(^<signal.h>)|(^<sys/ucontext.h>)|(^<ucontext.h>)'
|
||||
Priority: 1
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '^<([^\.])*>$'
|
||||
Priority: 2
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '^<.*\.'
|
||||
Priority: 3
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '.*'
|
||||
Priority: 4
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
IncludeIsMainRegex: '([-_](test|unittest))?$'
|
||||
IncludeIsMainSourceRegex: ''
|
||||
# IndentAccessModifiers: false
|
||||
IndentCaseBlocks: false
|
||||
IndentCaseLabels: false
|
||||
IndentExternBlock: NoIndent
|
||||
IndentGotoLabels: false
|
||||
IndentPPDirectives: AfterHash
|
||||
IndentRequires: false
|
||||
IndentWidth: 4
|
||||
IndentWrappedFunctionNames: false
|
||||
# InsertTrailingCommas: None
|
||||
JavaScriptQuotes: Leave
|
||||
JavaScriptWrapImports: true
|
||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||
MacroBlockBegin: ''
|
||||
MacroBlockEnd: ''
|
||||
MaxEmptyLinesToKeep: 1
|
||||
NamespaceIndentation: None
|
||||
NamespaceMacros:
|
||||
ObjCBinPackProtocolList: Never
|
||||
ObjCBlockIndentWidth: 2
|
||||
ObjCBreakBeforeNestedBlockParam: true
|
||||
ObjCSpaceAfterProperty: false
|
||||
ObjCSpaceBeforeProtocolList: true
|
||||
PenaltyBreakAssignment: 2
|
||||
PenaltyBreakBeforeFirstCallParameter: 1
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyBreakTemplateDeclaration: 10
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyReturnTypeOnItsOwnLine: 200
|
||||
PenaltyIndentedWhitespace: 0
|
||||
PointerAlignment: Left
|
||||
RawStringFormats:
|
||||
- Language: Cpp
|
||||
Delimiters:
|
||||
- cc
|
||||
- CC
|
||||
- cpp
|
||||
- Cpp
|
||||
- CPP
|
||||
- 'c++'
|
||||
- 'C++'
|
||||
CanonicalDelimiter: ''
|
||||
BasedOnStyle: google
|
||||
- Language: TextProto
|
||||
Delimiters:
|
||||
- pb
|
||||
- PB
|
||||
- proto
|
||||
- PROTO
|
||||
EnclosingFunctions:
|
||||
- EqualsProto
|
||||
- EquivToProto
|
||||
- PARSE_PARTIAL_TEXT_PROTO
|
||||
- PARSE_TEST_PROTO
|
||||
- PARSE_TEXT_PROTO
|
||||
- ParseTextOrDie
|
||||
- ParseTextProtoOrDie
|
||||
- ParseTestProto
|
||||
- ParsePartialTestProto
|
||||
CanonicalDelimiter: ''
|
||||
BasedOnStyle: google
|
||||
ReflowComments: true
|
||||
# ShortNamespaceLines: 5
|
||||
SortIncludes: true
|
||||
SortJavaStaticImport: Before
|
||||
SortUsingDeclarations: true
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceAfterLogicalNot: false
|
||||
SpaceAfterTemplateKeyword: false
|
||||
SpaceAroundPointerQualifiers: Default
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
SpaceBeforeCaseColon: false
|
||||
SpaceBeforeCpp11BracedList: false
|
||||
SpaceBeforeCtorInitializerColon: true
|
||||
SpaceBeforeInheritanceColon: true
|
||||
SpaceBeforeParens: ControlStatements
|
||||
SpaceBeforeRangeBasedForLoopColon: true
|
||||
SpaceBeforeSquareBrackets: false
|
||||
SpaceInEmptyBlock: false
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesBeforeTrailingComments: 2
|
||||
SpacesInAngles: false
|
||||
SpacesInConditionalStatement: false
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInContainerLiterals: false
|
||||
# SpacesInLineCommentPrefix: -1
|
||||
SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
Standard: Latest
|
||||
StatementAttributeLikeMacros:
|
||||
- Q_EMIT
|
||||
StatementMacros:
|
||||
- Q_UNUSED
|
||||
- QT_REQUIRE_VERSION
|
||||
TabWidth: 4
|
||||
TypenameMacros:
|
||||
UseCRLF: false
|
||||
UseTab: Never
|
||||
WhitespaceSensitiveMacros:
|
||||
- STRINGIZE
|
||||
- PP_STRINGIZE
|
||||
- BOOST_PP_STRINGIZE
|
||||
- NS_SWIFT_NAME
|
||||
- CF_SWIFT_NAME
|
||||
- FCODE
|
||||
- ICODE
|
||||
...
|
||||
13
src/dynarmic/.gitignore
vendored
Normal file
13
src/dynarmic/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
# Built files
|
||||
build/
|
||||
build-*/
|
||||
cmake-build-*/
|
||||
.idea/
|
||||
docs/Doxygen/
|
||||
# Generated files
|
||||
src/dynarmic/backend/arm64/mig/
|
||||
src/dynarmic/backend/x64/mig/
|
||||
# System files
|
||||
.DS_Store
|
||||
.vscode
|
||||
.cache/
|
||||
209
src/dynarmic/CMakeLists.txt
Normal file
209
src/dynarmic/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,209 @@
|
|||
cmake_minimum_required(VERSION 3.12)
|
||||
project(dynarmic LANGUAGES C CXX ASM VERSION 6.7.0)
|
||||
|
||||
# Determine if we're built as a subproject (using add_subdirectory)
|
||||
# or if this is the master project.
|
||||
set(MASTER_PROJECT OFF)
|
||||
if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
|
||||
set(MASTER_PROJECT ON)
|
||||
endif()
|
||||
|
||||
if (MASTER_PROJECT)
|
||||
include(CTest)
|
||||
endif()
|
||||
|
||||
# Dynarmic project options
|
||||
option(DYNARMIC_ENABLE_CPU_FEATURE_DETECTION "Turning this off causes dynarmic to assume the host CPU doesn't support anything later than SSE3" ON)
|
||||
option(DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT "Enables support for systems that require W^X" OFF)
|
||||
option(DYNARMIC_FATAL_ERRORS "Errors are fatal" OFF)
|
||||
option(DYNARMIC_IGNORE_ASSERTS "Ignore asserts" OFF)
|
||||
option(DYNARMIC_TESTS_USE_UNICORN "Enable fuzzing tests against unicorn" OFF)
|
||||
option(DYNARMIC_USE_LLVM "Support disassembly of jitted x86_64 code using LLVM" OFF)
|
||||
option(DYNARMIC_USE_PRECOMPILED_HEADERS "Use precompiled headers" ON)
|
||||
option(DYNARMIC_USE_BUNDLED_EXTERNALS "Use all bundled externals (useful when e.g. cross-compiling)" OFF)
|
||||
option(DYNARMIC_WARNINGS_AS_ERRORS "Warnings as errors" ${MASTER_PROJECT})
|
||||
if (NOT DEFINED DYNARMIC_FRONTENDS)
|
||||
set(DYNARMIC_FRONTENDS "A32;A64" CACHE STRING "Selects which frontends to enable")
|
||||
endif()
|
||||
|
||||
# Default to a Release build
|
||||
if (NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." FORCE)
|
||||
message(STATUS "Defaulting to a Release build")
|
||||
endif()
|
||||
|
||||
# Set hard requirements for C++
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
# Disable in-source builds
|
||||
# set(CMAKE_DISABLE_SOURCE_CHANGES ON)
|
||||
# set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
|
||||
if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
|
||||
message(SEND_ERROR "In-source builds are not allowed.")
|
||||
endif()
|
||||
|
||||
# Add the module directory to the list of paths
|
||||
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")
|
||||
|
||||
# Arch detection
|
||||
include(DetectArchitecture)
|
||||
if (NOT DEFINED ARCHITECTURE)
|
||||
message(FATAL_ERROR "Unsupported architecture encountered. Ending CMake generation.")
|
||||
endif()
|
||||
message(STATUS "Target architecture: ${ARCHITECTURE}")
|
||||
|
||||
# Compiler flags
|
||||
if (MSVC)
|
||||
set(DYNARMIC_CXX_FLAGS
|
||||
/experimental:external
|
||||
/external:W0
|
||||
/external:anglebrackets
|
||||
/W4
|
||||
/w44263 # Non-virtual member function hides base class virtual function
|
||||
/w44265 # Class has virtual functions, but destructor is not virtual
|
||||
/w44456 # Declaration of 'var' hides previous local declaration
|
||||
/w44457 # Declaration of 'var' hides function parameter
|
||||
/w44458 # Declaration of 'var' hides class member
|
||||
/w44459 # Declaration of 'var' hides global definition
|
||||
/w44946 # Reinterpret-cast between related types
|
||||
/wd4592 # Symbol will be dynamically initialized (implementation limitation)
|
||||
/permissive- # Stricter C++ standards conformance
|
||||
/MP
|
||||
/Zi
|
||||
/Zo
|
||||
/EHsc
|
||||
/Zc:externConstexpr # Allows external linkage for variables declared "extern constexpr", as the standard permits.
|
||||
/Zc:inline # Omits inline functions from object-file output.
|
||||
/Zc:throwingNew # Assumes new (without std::nothrow) never returns null.
|
||||
/volatile:iso # Use strict standard-abiding volatile semantics
|
||||
/bigobj # Increase number of sections in .obj files
|
||||
/DNOMINMAX)
|
||||
|
||||
if (DYNARMIC_WARNINGS_AS_ERRORS)
|
||||
list(APPEND DYNARMIC_CXX_FLAGS
|
||||
/WX)
|
||||
endif()
|
||||
|
||||
if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
|
||||
list(APPEND DYNARMIC_CXX_FLAGS
|
||||
-Qunused-arguments
|
||||
-Wno-missing-braces)
|
||||
endif()
|
||||
else()
|
||||
set(DYNARMIC_CXX_FLAGS
|
||||
-Wall
|
||||
-Wextra
|
||||
-Wcast-qual
|
||||
-pedantic
|
||||
-Wno-missing-braces)
|
||||
|
||||
if (ARCHITECTURE STREQUAL "x86_64")
|
||||
list(APPEND DYNARMIC_CXX_FLAGS -mtune=core2)
|
||||
endif()
|
||||
|
||||
if (DYNARMIC_WARNINGS_AS_ERRORS)
|
||||
list(APPEND DYNARMIC_CXX_FLAGS
|
||||
-pedantic-errors
|
||||
-Werror)
|
||||
endif()
|
||||
|
||||
if (DYNARMIC_FATAL_ERRORS)
|
||||
list(APPEND DYNARMIC_CXX_FLAGS
|
||||
-Wfatal-errors)
|
||||
endif()
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU")
|
||||
# GCC produces bogus -Warray-bounds warnings from xbyak headers for code paths that are not
|
||||
# actually reachable. Specifically, it happens in cases where some code casts an Operand&
|
||||
# to Address& after first checking isMEM(), and that code is inlined in a situation where
|
||||
# GCC knows that the variable is actually a Reg64. isMEM() will never return true for a
|
||||
# Reg64, but GCC doesn't know that.
|
||||
list(APPEND DYNARMIC_CXX_FLAGS -Wno-array-bounds)
|
||||
list(APPEND DYNARMIC_CXX_FLAGS -Wstack-usage=4096)
|
||||
endif()
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES "[Cc]lang")
|
||||
# Bracket depth determines maximum size of a fold expression in Clang since 9c9974c3ccb6.
|
||||
# And this in turns limits the size of a std::array.
|
||||
list(APPEND DYNARMIC_CXX_FLAGS -fbracket-depth=1024)
|
||||
# Clang mistakenly blames CMake for using unused arguments during compilation
|
||||
list(APPEND DYNARMIC_CXX_FLAGS -Wno-unused-command-line-argument)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Forced use of individual bundled libraries for non-REQUIRED library is possible with e.g. cmake -DCMAKE_DISABLE_FIND_PACKAGE_fmt=ON ...
|
||||
|
||||
if (DYNARMIC_USE_BUNDLED_EXTERNALS)
|
||||
set(CMAKE_DISABLE_FIND_PACKAGE_biscuit ON)
|
||||
set(CMAKE_DISABLE_FIND_PACKAGE_Catch2 ON)
|
||||
set(CMAKE_DISABLE_FIND_PACKAGE_fmt ON)
|
||||
set(CMAKE_DISABLE_FIND_PACKAGE_mcl ON)
|
||||
set(CMAKE_DISABLE_FIND_PACKAGE_oaknut ON)
|
||||
set(CMAKE_DISABLE_FIND_PACKAGE_unordered_dense ON)
|
||||
set(CMAKE_DISABLE_FIND_PACKAGE_xbyak ON)
|
||||
set(CMAKE_DISABLE_FIND_PACKAGE_Zydis ON)
|
||||
set(CMAKE_DISABLE_FIND_PACKAGE_Zycore ON)
|
||||
endif()
|
||||
|
||||
set(CPM_USE_LOCAL_PACKAGES ON)
|
||||
|
||||
find_package(Boost 1.57 REQUIRED)
|
||||
find_package(fmt 9 CONFIG)
|
||||
|
||||
if ("arm64" IN_LIST ARCHITECTURE OR DYNARMIC_TESTS)
|
||||
find_package(oaknut 2.0.1 CONFIG)
|
||||
endif()
|
||||
|
||||
if ("x86_64" IN_LIST ARCHITECTURE)
|
||||
find_package(xbyak 7 CONFIG)
|
||||
endif()
|
||||
|
||||
if (DYNARMIC_USE_LLVM)
|
||||
find_package(LLVM REQUIRED)
|
||||
separate_arguments(LLVM_DEFINITIONS)
|
||||
endif()
|
||||
|
||||
if (DYNARMIC_TESTS)
|
||||
find_package(Catch2 3 CONFIG)
|
||||
if (DYNARMIC_TESTS_USE_UNICORN)
|
||||
find_package(Unicorn REQUIRED)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Pull in externals CMakeLists for libs where available
|
||||
add_subdirectory(externals)
|
||||
|
||||
# Dynarmic project files
|
||||
add_subdirectory(src/dynarmic)
|
||||
if (DYNARMIC_TESTS)
|
||||
add_subdirectory(tests)
|
||||
endif()
|
||||
|
||||
#
|
||||
# Install
|
||||
#
|
||||
include(GNUInstallDirs)
|
||||
include(CMakePackageConfigHelpers)
|
||||
|
||||
install(TARGETS dynarmic EXPORT dynarmicTargets)
|
||||
install(EXPORT dynarmicTargets
|
||||
NAMESPACE dynarmic::
|
||||
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/dynarmic"
|
||||
)
|
||||
|
||||
configure_package_config_file(CMakeModules/dynarmicConfig.cmake.in
|
||||
dynarmicConfig.cmake
|
||||
INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/dynarmic"
|
||||
)
|
||||
write_basic_package_version_file(dynarmicConfigVersion.cmake
|
||||
COMPATIBILITY SameMajorVersion
|
||||
)
|
||||
install(FILES
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/dynarmicConfig.cmake"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/dynarmicConfigVersion.cmake"
|
||||
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/dynarmic"
|
||||
)
|
||||
|
||||
install(DIRECTORY src/dynarmic TYPE INCLUDE FILES_MATCHING PATTERN "*.h")
|
||||
17
src/dynarmic/CMakeModules/CreateDirectoryGroups.cmake
Normal file
17
src/dynarmic/CMakeModules/CreateDirectoryGroups.cmake
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
# This function should be passed a name of an existing target. It will automatically generate
|
||||
# file groups following the directory hierarchy, so that the layout of the files in IDEs matches the
|
||||
# one in the filesystem.
|
||||
function(create_target_directory_groups target_name)
|
||||
# Place any files that aren't in the source list in a separate group so that they don't get in
|
||||
# the way.
|
||||
source_group("Other Files" REGULAR_EXPRESSION ".")
|
||||
|
||||
get_target_property(target_sources "${target_name}" SOURCES)
|
||||
|
||||
foreach(file_name IN LISTS target_sources)
|
||||
get_filename_component(dir_name "${file_name}" PATH)
|
||||
# Group names use '\' as a separator even though the entire rest of CMake uses '/'...
|
||||
string(REPLACE "/" "\\" group_name "${dir_name}")
|
||||
source_group("${group_name}" FILES "${file_name}")
|
||||
endforeach()
|
||||
endfunction()
|
||||
62
src/dynarmic/CMakeModules/DetectArchitecture.cmake
Normal file
62
src/dynarmic/CMakeModules/DetectArchitecture.cmake
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
include(CheckSymbolExists)
|
||||
|
||||
if (CMAKE_OSX_ARCHITECTURES)
|
||||
set(DYNARMIC_MULTIARCH_BUILD 1)
|
||||
set(ARCHITECTURE "${CMAKE_OSX_ARCHITECTURES}")
|
||||
return()
|
||||
endif()
|
||||
|
||||
function(detect_architecture symbol arch)
|
||||
if (NOT DEFINED ARCHITECTURE)
|
||||
set(CMAKE_REQUIRED_QUIET YES)
|
||||
check_symbol_exists("${symbol}" "" DETECT_ARCHITECTURE_${arch})
|
||||
unset(CMAKE_REQUIRED_QUIET)
|
||||
|
||||
if (DETECT_ARCHITECTURE_${arch})
|
||||
set(ARCHITECTURE "${arch}" PARENT_SCOPE)
|
||||
endif()
|
||||
|
||||
unset(DETECT_ARCHITECTURE_${arch} CACHE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
detect_architecture("__ARM64__" arm64)
|
||||
detect_architecture("__aarch64__" arm64)
|
||||
detect_architecture("_M_ARM64" arm64)
|
||||
|
||||
detect_architecture("__arm__" arm)
|
||||
detect_architecture("__TARGET_ARCH_ARM" arm)
|
||||
detect_architecture("_M_ARM" arm)
|
||||
|
||||
detect_architecture("__x86_64" x86_64)
|
||||
detect_architecture("__x86_64__" x86_64)
|
||||
detect_architecture("__amd64" x86_64)
|
||||
detect_architecture("_M_X64" x86_64)
|
||||
|
||||
detect_architecture("__i386" x86)
|
||||
detect_architecture("__i386__" x86)
|
||||
detect_architecture("_M_IX86" x86)
|
||||
|
||||
detect_architecture("__ia64" ia64)
|
||||
detect_architecture("__ia64__" ia64)
|
||||
detect_architecture("_M_IA64" ia64)
|
||||
|
||||
detect_architecture("__mips" mips)
|
||||
detect_architecture("__mips__" mips)
|
||||
detect_architecture("_M_MRX000" mips)
|
||||
|
||||
detect_architecture("__ppc64__" ppc64)
|
||||
detect_architecture("__powerpc64__" ppc64)
|
||||
|
||||
detect_architecture("__ppc__" ppc)
|
||||
detect_architecture("__ppc" ppc)
|
||||
detect_architecture("__powerpc__" ppc)
|
||||
detect_architecture("_ARCH_COM" ppc)
|
||||
detect_architecture("_ARCH_PWR" ppc)
|
||||
detect_architecture("_ARCH_PPC" ppc)
|
||||
detect_architecture("_M_MPPC" ppc)
|
||||
detect_architecture("_M_PPC" ppc)
|
||||
|
||||
detect_architecture("__riscv" riscv)
|
||||
|
||||
detect_architecture("__EMSCRIPTEN__" wasm)
|
||||
37
src/dynarmic/CMakeModules/FindUnicorn.cmake
Normal file
37
src/dynarmic/CMakeModules/FindUnicorn.cmake
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
# Exports:
|
||||
#
|
||||
# Variables:
|
||||
# LIBUNICORN_FOUND
|
||||
# LIBUNICORN_INCLUDE_DIR
|
||||
# LIBUNICORN_LIBRARY
|
||||
#
|
||||
# Target:
|
||||
# Unicorn::Unicorn
|
||||
#
|
||||
|
||||
find_path(LIBUNICORN_INCLUDE_DIR
|
||||
unicorn/unicorn.h
|
||||
HINTS $ENV{UNICORNDIR}
|
||||
PATH_SUFFIXES include)
|
||||
|
||||
find_library(LIBUNICORN_LIBRARY
|
||||
NAMES unicorn
|
||||
HINTS $ENV{UNICORNDIR})
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(Unicorn DEFAULT_MSG LIBUNICORN_LIBRARY LIBUNICORN_INCLUDE_DIR)
|
||||
|
||||
if (UNICORN_FOUND)
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_package(Threads REQUIRED)
|
||||
unset(THREADS_PREFER_PTHREAD_FLAG)
|
||||
|
||||
add_library(Unicorn::Unicorn UNKNOWN IMPORTED)
|
||||
set_target_properties(Unicorn::Unicorn PROPERTIES
|
||||
IMPORTED_LOCATION ${LIBUNICORN_LIBRARY}
|
||||
INTERFACE_INCLUDE_DIRECTORIES ${LIBUNICORN_INCLUDE_DIR}
|
||||
INTERFACE_LINK_LIBRARIES Threads::Threads
|
||||
)
|
||||
endif()
|
||||
|
||||
mark_as_advanced(LIBUNICORN_INCLUDE_DIR LIBUNICORN_LIBRARY)
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
function(target_architecture_specific_sources project arch)
|
||||
if (NOT DYNARMIC_MULTIARCH_BUILD)
|
||||
target_sources("${project}" PRIVATE ${ARGN})
|
||||
return()
|
||||
endif()
|
||||
|
||||
foreach(input_file IN LISTS ARGN)
|
||||
if(input_file MATCHES ".cpp$")
|
||||
if(NOT IS_ABSOLUTE ${input_file})
|
||||
set(input_file "${CMAKE_CURRENT_SOURCE_DIR}/${input_file}")
|
||||
endif()
|
||||
|
||||
set(output_file "${CMAKE_CURRENT_BINARY_DIR}/arch_gen/${input_file}")
|
||||
add_custom_command(
|
||||
OUTPUT "${output_file}"
|
||||
COMMAND ${CMAKE_COMMAND} "-Darch=${arch}"
|
||||
"-Dinput_file=${input_file}"
|
||||
"-Doutput_file=${output_file}"
|
||||
-P "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/impl/TargetArchitectureSpecificSourcesWrapFile.cmake"
|
||||
DEPENDS "${input_file}"
|
||||
VERBATIM
|
||||
)
|
||||
target_sources(${project} PRIVATE "${output_file}")
|
||||
endif()
|
||||
endforeach()
|
||||
endfunction()
|
||||
33
src/dynarmic/CMakeModules/dynarmicConfig.cmake.in
Normal file
33
src/dynarmic/CMakeModules/dynarmicConfig.cmake.in
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
@PACKAGE_INIT@
|
||||
|
||||
include(CMakeFindDependencyMacro)
|
||||
|
||||
set(ARCHITECTURE "@ARCHITECTURE@")
|
||||
|
||||
if (NOT @BUILD_SHARED_LIBS@)
|
||||
find_dependency(Boost 1.57)
|
||||
find_dependency(fmt 9)
|
||||
find_dependency(mcl 0.1.12 EXACT)
|
||||
find_dependency(unordered_dense)
|
||||
|
||||
if ("arm64" IN_LIST ARCHITECTURE)
|
||||
find_dependency(oaknut 2.0.1)
|
||||
endif()
|
||||
|
||||
if ("riscv" IN_LIST ARCHITECTURE)
|
||||
find_dependency(biscuit 0.9.1)
|
||||
endif()
|
||||
|
||||
if ("x86_64" IN_LIST ARCHITECTURE)
|
||||
find_dependency(xbyak 7)
|
||||
find_dependency(Zydis 4)
|
||||
endif()
|
||||
|
||||
if (@DYNARMIC_USE_LLVM@)
|
||||
find_dependency(LLVM)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")
|
||||
|
||||
check_required_components(@PROJECT_NAME@)
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
string(TOUPPER "${arch}" arch)
|
||||
file(READ "${input_file}" f_contents)
|
||||
file(WRITE "${output_file}" "#include <mcl/macro/architecture.hpp>\n#if defined(MCL_ARCHITECTURE_${arch})\n${f_contents}\n#endif\n")
|
||||
12
src/dynarmic/LICENSE.txt
Normal file
12
src/dynarmic/LICENSE.txt
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
Copyright (C) 2017 merryhime <git@mary.rs>
|
||||
|
||||
Permission to use, copy, modify, and/or distribute this software for
|
||||
any purpose with or without fee is hereby granted.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
|
||||
AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
420
src/dynarmic/README.md
Normal file
420
src/dynarmic/README.md
Normal file
|
|
@ -0,0 +1,420 @@
|
|||
Dynarmic
|
||||
========
|
||||
|
||||
[](https://github.com/yuzu-mirror/dynarmic/actions/workflows/x86-64.yml) [](https://github.com/yuzu-mirror/dynarmic/actions/workflows/AArch64.yml)
|
||||
|
||||
A dynamic recompiler for ARM.
|
||||
|
||||
Highlight features:
|
||||
|
||||
- Fast dynamic binary translation via Just-in-Time compilation
|
||||
- Clean API
|
||||
- Implemented in modern C++20
|
||||
- Hooks exposed for easy code instrumentation
|
||||
- Code injection support for very fine-grained instrumentation
|
||||
- Support for unusual address space setups (bring-your-own memory system)
|
||||
- Native support for most popular operating systems (Windows, macOS, Linux, FreeBSD, OpenBSD, NetBSD, Android)
|
||||
|
||||
*Please note that an adversarial guest program [can determine if it is being run under dynarmic](#disadvantages-of-dynarmic). Preventing this is not a goal of this project.*
|
||||
|
||||
### Supported guest architectures
|
||||
|
||||
* v3
|
||||
* v4
|
||||
* v4T
|
||||
* v5TE
|
||||
* v6K
|
||||
* v6T2
|
||||
* v7A
|
||||
* 32-bit v8
|
||||
* 64-bit v8
|
||||
|
||||
You can specify the specific guest version using [ArchVersion](src/dynarmic/interface/A32/arch_version.h).
|
||||
|
||||
There are no plans to support v1 or v2.
|
||||
|
||||
### Supported host architectures
|
||||
|
||||
* x86-64
|
||||
* AArch64
|
||||
|
||||
There are no plans to support any 32-bit architecture.
|
||||
|
||||
Important API Changes in v6.x Series
|
||||
------------------------------------
|
||||
|
||||
* **v6.7.0**
|
||||
* To support use cases where one wants to have the guest to have the same address space as the host, `nullptr` is now a valid value for `fastmem_pointer`.
|
||||
**This change is not backwards-compatible.** If you were previously using `nullptr` to represent an invalid fastmem arena, you will now have to use `std::nullopt`.
|
||||
|
||||
|
||||
Documentation
|
||||
-------------
|
||||
|
||||
Design documentation can be found at [docs/Design.md](docs/Design.md).
|
||||
|
||||
|
||||
Usage Example
|
||||
-------------
|
||||
|
||||
The below is a minimal example. Bring-your-own memory system.
|
||||
|
||||
```cpp
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <exception>
|
||||
|
||||
#include "dynarmic/interface/A32/a32.h"
|
||||
#include "dynarmic/interface/A32/config.h"
|
||||
|
||||
using u8 = std::uint8_t;
|
||||
using u16 = std::uint16_t;
|
||||
using u32 = std::uint32_t;
|
||||
using u64 = std::uint64_t;
|
||||
|
||||
class MyEnvironment final : public Dynarmic::A32::UserCallbacks {
|
||||
public:
|
||||
u64 ticks_left = 0;
|
||||
std::array<u8, 2048> memory{};
|
||||
|
||||
u8 MemoryRead8(u32 vaddr) override {
|
||||
if (vaddr >= memory.size()) {
|
||||
return 0;
|
||||
}
|
||||
return memory[vaddr];
|
||||
}
|
||||
|
||||
u16 MemoryRead16(u32 vaddr) override {
|
||||
return u16(MemoryRead8(vaddr)) | u16(MemoryRead8(vaddr + 1)) << 8;
|
||||
}
|
||||
|
||||
u32 MemoryRead32(u32 vaddr) override {
|
||||
return u32(MemoryRead16(vaddr)) | u32(MemoryRead16(vaddr + 2)) << 16;
|
||||
}
|
||||
|
||||
u64 MemoryRead64(u32 vaddr) override {
|
||||
return u64(MemoryRead32(vaddr)) | u64(MemoryRead32(vaddr + 4)) << 32;
|
||||
}
|
||||
|
||||
void MemoryWrite8(u32 vaddr, u8 value) override {
|
||||
if (vaddr >= memory.size()) {
|
||||
return;
|
||||
}
|
||||
memory[vaddr] = value;
|
||||
}
|
||||
|
||||
void MemoryWrite16(u32 vaddr, u16 value) override {
|
||||
MemoryWrite8(vaddr, u8(value));
|
||||
MemoryWrite8(vaddr + 1, u8(value >> 8));
|
||||
}
|
||||
|
||||
void MemoryWrite32(u32 vaddr, u32 value) override {
|
||||
MemoryWrite16(vaddr, u16(value));
|
||||
MemoryWrite16(vaddr + 2, u16(value >> 16));
|
||||
}
|
||||
|
||||
void MemoryWrite64(u32 vaddr, u64 value) override {
|
||||
MemoryWrite32(vaddr, u32(value));
|
||||
MemoryWrite32(vaddr + 4, u32(value >> 32));
|
||||
}
|
||||
|
||||
void InterpreterFallback(u32 pc, size_t num_instructions) override {
|
||||
// This is never called in practice.
|
||||
std::terminate();
|
||||
}
|
||||
|
||||
void CallSVC(u32 swi) override {
|
||||
// Do something.
|
||||
}
|
||||
|
||||
void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
|
||||
// Do something.
|
||||
}
|
||||
|
||||
void AddTicks(u64 ticks) override {
|
||||
if (ticks > ticks_left) {
|
||||
ticks_left = 0;
|
||||
return;
|
||||
}
|
||||
ticks_left -= ticks;
|
||||
}
|
||||
|
||||
u64 GetTicksRemaining() override {
|
||||
return ticks_left;
|
||||
}
|
||||
};
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
MyEnvironment env;
|
||||
Dynarmic::A32::UserConfig user_config;
|
||||
user_config.callbacks = &env;
|
||||
Dynarmic::A32::Jit cpu{user_config};
|
||||
|
||||
// Execute at least 1 instruction.
|
||||
// (Note: More than one instruction may be executed.)
|
||||
env.ticks_left = 1;
|
||||
|
||||
// Write some code to memory.
|
||||
env.MemoryWrite16(0, 0x0088); // lsls r0, r1, #2
|
||||
env.MemoryWrite16(2, 0xE7FE); // b +#0 (infinite loop)
|
||||
|
||||
// Setup registers.
|
||||
cpu.Regs()[0] = 1;
|
||||
cpu.Regs()[1] = 2;
|
||||
cpu.Regs()[15] = 0; // PC = 0
|
||||
cpu.SetCpsr(0x00000030); // Thumb mode
|
||||
|
||||
// Execute!
|
||||
cpu.Run();
|
||||
|
||||
// Here we would expect cpu.Regs()[0] == 8
|
||||
printf("R0: %u\n", cpu.Regs()[0]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
Alternatives to Dynarmic
|
||||
------------------------
|
||||
|
||||
Here are some projects with the same goals as dynarmic:
|
||||
|
||||
* [Unicorn](https://www.unicorn-engine.org/) - Recompiling multi-architecture CPU emulator, based on QEMU
|
||||
* [SkyEye](http://skyeye.sourceforge.net) - Cached interpreter for ARM
|
||||
|
||||
More general alternatives:
|
||||
|
||||
* [tARMac](https://davidsharp.com/tarmac/) - Tarmac's use of armlets was initial inspiration for us to use an intermediate representation
|
||||
* [QEMU](https://www.qemu.org/) - Recompiling multi-architecture system emulator
|
||||
* [VisUAL](https://salmanarif.bitbucket.io/visual/index.html) - Visual ARM UAL emulator intended for education
|
||||
* A wide variety of other recompilers, interpreters and emulators can be found embedded in other projects, here are some we would recommend looking at:
|
||||
* [firebird's recompiler](https://github.com/nspire-emus/firebird) - Takes more of a call-threaded approach to recompilation
|
||||
* [higan's arm7tdmi emulator](https://github.com/higan-emu/higan/tree/master/higan/component/processor/arm7tdmi) - Very clean code-style
|
||||
* [arm-js by ozaki-r](https://github.com/ozaki-r/arm-js) - Emulates ARMv7A and some peripherals of Versatile Express, in the browser
|
||||
|
||||
Disadvantages of Dynarmic
|
||||
-------------------------
|
||||
|
||||
In the pursuit of speed, some behavior not commonly depended upon is elided. Therefore this emulator does not match spec.
|
||||
Please note that this would mean that a guest application can easily determine if it is being run under instrumentation.
|
||||
|
||||
Known examples:
|
||||
|
||||
* Only user-mode is emulated, there is no emulation of any other privilege levels.
|
||||
* FPSR state is approximate.
|
||||
* Misaligned loads/stores are not appropriately trapped in certain cases.
|
||||
* Exclusive monitor behavior may not match any known physical processor.
|
||||
|
||||
No formal verification has been done, and no security assessment has been made.
|
||||
Use this code base at your own risk.
|
||||
|
||||
Legal
|
||||
-----
|
||||
|
||||
dynarmic is under a 0BSD license. See LICENSE.txt for more details.
|
||||
|
||||
dynarmic uses several other libraries, whose licenses are included below:
|
||||
|
||||
### biscuit
|
||||
|
||||
```
|
||||
Copyright 2021 Lioncash/Lioncache
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
IN THE SOFTWARE.
|
||||
```
|
||||
|
||||
### catch
|
||||
|
||||
```
|
||||
Boost Software License - Version 1.0 - August 17th, 2003
|
||||
|
||||
Permission is hereby granted, free of charge, to any person or organization
|
||||
obtaining a copy of the software and accompanying documentation covered by
|
||||
this license (the "Software") to use, reproduce, display, distribute,
|
||||
execute, and transmit the Software, and to prepare derivative works of the
|
||||
Software, and to permit third-parties to whom the Software is furnished to
|
||||
do so, all subject to the following:
|
||||
|
||||
The copyright notices in the Software and this entire statement, including
|
||||
the above license grant, this restriction and the following disclaimer,
|
||||
must be included in all copies of the Software, in whole or in part, and
|
||||
all derivative works of the Software, unless such copies or derivative
|
||||
works are solely in the form of machine-executable object code generated by
|
||||
a source language processor.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
||||
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
```
|
||||
|
||||
### fmt
|
||||
|
||||
```
|
||||
Copyright (c) 2012 - 2016, Victor Zverovich
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
```
|
||||
|
||||
### mcl & oaknut
|
||||
|
||||
```
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
```
|
||||
|
||||
### unordered_dense
|
||||
|
||||
```
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022 Martin Leitner-Ankerl
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
```
|
||||
|
||||
### xbyak
|
||||
|
||||
```
|
||||
Copyright (c) 2007 MITSUNARI Shigeo
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
Neither the name of the copyright owner nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
THE POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
ソースコード形式かバイナリ形式か、変更するかしないかを問わず、以下の条件を満た
|
||||
す場合に限り、再頒布および使用が許可されます。
|
||||
|
||||
ソースコードを再頒布する場合、上記の著作権表示、本条件一覧、および下記免責条項
|
||||
を含めること。
|
||||
バイナリ形式で再頒布する場合、頒布物に付属のドキュメント等の資料に、上記の著作
|
||||
権表示、本条件一覧、および下記免責条項を含めること。
|
||||
書面による特別の許可なしに、本ソフトウェアから派生した製品の宣伝または販売促進
|
||||
に、著作権者の名前またはコントリビューターの名前を使用してはならない。
|
||||
本ソフトウェアは、著作権者およびコントリビューターによって「現状のまま」提供さ
|
||||
れており、明示黙示を問わず、商業的な使用可能性、および特定の目的に対する適合性
|
||||
に関する暗黙の保証も含め、またそれに限定されない、いかなる保証もありません。
|
||||
著作権者もコントリビューターも、事由のいかんを問わず、 損害発生の原因いかんを
|
||||
問わず、かつ責任の根拠が契約であるか厳格責任であるか(過失その他の)不法行為で
|
||||
あるかを問わず、仮にそのような損害が発生する可能性を知らされていたとしても、
|
||||
本ソフトウェアの使用によって発生した(代替品または代用サービスの調達、使用の
|
||||
喪失、データの喪失、利益の喪失、業務の中断も含め、またそれに限定されない)直接
|
||||
損害、間接損害、偶発的な損害、特別損害、懲罰的損害、または結果損害について、
|
||||
一切責任を負わないものとします。
|
||||
```
|
||||
|
||||
### zydis
|
||||
|
||||
```
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2014-2020 Florian Bernd
|
||||
Copyright (c) 2014-2020 Joel Höner
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
```
|
||||
324
src/dynarmic/docs/Design.md
Normal file
324
src/dynarmic/docs/Design.md
Normal file
|
|
@ -0,0 +1,324 @@
|
|||
# Dynarmic Design Documentation
|
||||
|
||||
Dynarmic is a dynamic recompiler for the ARMv6K architecture. Future plans for dynarmic include
|
||||
support for other versions of the ARM architecture, having a interpreter mode, and adding support
|
||||
for other architectures.
|
||||
|
||||
Users of this library interact with it primarily through the interface provided in
|
||||
[`src/dynarmic/interface`](../src/dynarmic/interface). Users specify how dynarmic's CPU core interacts with
|
||||
the rest of their system providing an implementation of the relevant `UserCallbacks` interface.
|
||||
Users setup the CPU state using member functions of `Jit`, then call `Jit::Execute` to start CPU
|
||||
execution. The callbacks defined on `UserCallbacks` may be called from dynamically generated code,
|
||||
so users of the library should not depend on the stack being in a walkable state for unwinding.
|
||||
|
||||
* A32: [`Jit`](../src/dynarmic/interface/A32/a32.h), [`UserCallbacks`](../src/dynarmic/interface/A32/config.h)
|
||||
* A64: [`Jit`](../src/dynarmic/interface/A64/a64.h), [`UserCallbacks`](../src/dynarmic/interface/A64/config.h)
|
||||
|
||||
Dynarmic reads instructions from memory by calling `UserCallbacks::MemoryReadCode`. These
|
||||
instructions then pass through several stages:
|
||||
|
||||
1. Decoding (Identifying what type of instruction it is and breaking it up into fields)
|
||||
2. Translation (Generation of high-level IR from the instruction)
|
||||
3. Optimization (Eliminiation of redundant microinstructions, other speed improvements)
|
||||
4. Emission (Generation of host-executable code into memory)
|
||||
5. Execution (Host CPU jumps to the start of emitted code and runs it)
|
||||
|
||||
Using the A32 frontend with the x64 backend as an example:
|
||||
|
||||
* Decoding is done by [double dispatch](https://en.wikipedia.org/wiki/Visitor_pattern) in
|
||||
[`src/frontend/A32/decoder/{arm.h,thumb16.h,thumb32.h}`](../src/dynarmic/frontend/A32/decoder/).
|
||||
* Translation is done by the visitors in [`src/dynarmic/frontend/A32/translate/translate_{arm,thumb}.cpp`](../src/dynarmic/frontend/A32/translate/).
|
||||
The function [`Translate`](../src/dynarmic/frontend/A32/translate/translate.h) takes a starting memory location,
|
||||
some CPU state, and memory reader callback and returns a basic block of IR.
|
||||
* The IR can be found under [`src/frontend/ir/`](../src/dynarmic/ir/).
|
||||
* Optimizations can be found under [`src/ir_opt/`](../src/dynarmic/ir/opt/).
|
||||
* Emission is done by `EmitX64` which can be found in [`src/dynarmic/backend/x64/emit_x64.{h,cpp}`](../src/dynarmic/backend/x64/).
|
||||
* Execution is performed by calling `BlockOfCode::RunCode` in [`src/dynarmic/backend/x64/block_of_code.{h,cpp}`](../src/dynarmic/backend/x64/).
|
||||
|
||||
## Decoder
|
||||
|
||||
The decoder is a double dispatch decoder. Each instruction is represented by a line in the relevant
|
||||
instruction table. Here is an example line from [`arm.h`](../src/dynarmic/frontend/A32/decoder/arm.h):
|
||||
|
||||
INST(&V::arm_ADC_imm, "ADC (imm)", "cccc0010101Snnnnddddrrrrvvvvvvvv")
|
||||
|
||||
(Details on this instruction can be found in section A8.8.1 of the ARMv7-A manual. This is encoding A1.)
|
||||
|
||||
The first argument to INST is the member function to call on the visitor. The second argument is a user-readable
|
||||
instruction name. The third argument is a bit-representation of the instruction.
|
||||
|
||||
### Instruction Bit-Representation
|
||||
|
||||
Each character in the bitstring represents a bit. A `0` means that that bitposition **must** contain a zero. A `1`
|
||||
means that that bitposition **must** contain a one. A `-` means we don't care about the value at that bitposition.
|
||||
A string of the same character represents a field. In the above example, the first four bits `cccc` represent the
|
||||
four-bit-long cond field of the ARM Add with Carry (immediate) instruction.
|
||||
|
||||
The visitor would have to have a function named `arm_ADC_imm` with 6 arguments, one for each field (`cccc`, `S`,
|
||||
`nnnn`, `dddd`, `rrrr`, `vvvvvvvv`). If there is a mismatch of field number with argument number, a compile-time
|
||||
error results.
|
||||
|
||||
## Translator
|
||||
|
||||
The translator is a visitor that uses the decoder to decode instructions. The translator generates IR code with the
|
||||
help of the [`IREmitter` class](../src/dynarmic/ir/ir_emitter.h). An example of a translation function follows:
|
||||
|
||||
bool ArmTranslatorVisitor::arm_ADC_imm(Cond cond, bool S, Reg n, Reg d, int rotate, Imm8 imm8) {
|
||||
u32 imm32 = ArmExpandImm(rotate, imm8);
|
||||
|
||||
// ADC{S}<c> <Rd>, <Rn>, #<imm>
|
||||
|
||||
if (ConditionPassed(cond)) {
|
||||
auto result = ir.AddWithCarry(ir.GetRegister(n), ir.Imm32(imm32), ir.GetCFlag());
|
||||
|
||||
if (d == Reg::PC) {
|
||||
ASSERT(!S);
|
||||
ir.ALUWritePC(result.result);
|
||||
ir.SetTerm(IR::Term::ReturnToDispatch{});
|
||||
return false;
|
||||
}
|
||||
|
||||
ir.SetRegister(d, result.result);
|
||||
if (S) {
|
||||
ir.SetNFlag(ir.MostSignificantBit(result.result));
|
||||
ir.SetZFlag(ir.IsZero(result.result));
|
||||
ir.SetCFlag(result.carry);
|
||||
ir.SetVFlag(result.overflow);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
where `ir` is an instance of the `IRBuilder` class. Each member function of the `IRBuilder` class constructs
|
||||
an IR microinstruction.
|
||||
|
||||
## Intermediate Representation
|
||||
|
||||
Dynarmic uses an ordered SSA intermediate representation. It is very vaguely similar to those found in other
|
||||
similar projects like redream, nucleus, and xenia. Major differences are: (1) the abundance of context
|
||||
microinstructions whereas those projects generally only have two (`load_context`/`store_context`), (2) the
|
||||
explicit handling of flags as their own values, and (3) very different basic block edge handling.
|
||||
|
||||
The intention of the context microinstructions and explicit flag handling is to allow for future optimizations. The
|
||||
differences in the way edges are handled are a quirk of the current implementation and dynarmic will likely add a
|
||||
function analyser in the medium-term future.
|
||||
|
||||
Dynarmic's intermediate representation is typed. Each microinstruction may take zero or more arguments and may
|
||||
return zero or more arguments. A subset of the microinstructions available is documented below.
|
||||
|
||||
A complete list of microinstructions can be found in [src/dynarmic/ir/opcodes.inc](../src/dynarmic/ir/opcodes.inc).
|
||||
|
||||
The below lists some commonly used microinstructions.
|
||||
|
||||
### Immediate: Imm{U1,U8,U32,RegRef}
|
||||
|
||||
<u1> ImmU1(u1 value)
|
||||
<u8> ImmU8(u8 value)
|
||||
<u32> ImmU32(u32 value)
|
||||
<RegRef> ImmRegRef(Arm::Reg gpr)
|
||||
|
||||
These instructions take a `bool`, `u8` or `u32` value and wraps it up in an IR node so that they can be used
|
||||
by the IR.
|
||||
|
||||
### Context: {Get,Set}Register
|
||||
|
||||
<u32> GetRegister(<RegRef> reg)
|
||||
<void> SetRegister(<RegRef> reg, <u32> value)
|
||||
|
||||
Gets and sets `JitState::Reg[reg]`. Note that `SetRegister(Arm::Reg::R15, _)` is disallowed by IRBuilder.
|
||||
Use `{ALU,BX}WritePC` instead.
|
||||
|
||||
Note that sequences like `SetRegister(R4, _)` followed by `GetRegister(R4)` are
|
||||
optimized away.
|
||||
|
||||
### Context: {Get,Set}{N,Z,C,V}Flag
|
||||
|
||||
<u1> GetNFlag()
|
||||
<void> SetNFlag(<u1> value)
|
||||
<u1> GetZFlag()
|
||||
<void> SetZFlag(<u1> value)
|
||||
<u1> GetCFlag()
|
||||
<void> SetCFlag(<u1> value)
|
||||
<u1> GetVFlag()
|
||||
<void> SetVFlag(<u1> value)
|
||||
|
||||
Gets and sets bits in `JitState::Cpsr`. Similarly to registers redundant get/sets are optimized away.
|
||||
|
||||
### Context: BXWritePC
|
||||
|
||||
<void> BXWritePC(<u32> value)
|
||||
|
||||
This should probably be the last instruction in a translation block unless you're doing something fancy.
|
||||
|
||||
This microinstruction sets R15 and CPSR.T as appropriate.
|
||||
|
||||
### Callback: CallSupervisor
|
||||
|
||||
<void> CallSupervisor(<u32> svc_imm32)
|
||||
|
||||
This should probably be the last instruction in a translation block unless you're doing something fancy.
|
||||
|
||||
### Calculation: LastSignificant{Half,Byte}
|
||||
|
||||
<u16> LeastSignificantHalf(<u32> value)
|
||||
<u8> LeastSignificantByte(<u32> value)
|
||||
|
||||
Extract a u16 and u8 respectively from a u32.
|
||||
|
||||
### Calculation: MostSignificantBit, IsZero
|
||||
|
||||
<u1> MostSignificantBit(<u32> value)
|
||||
<u1> IsZero(<u32> value)
|
||||
|
||||
These are used to implement ARM flags N and Z. These can often be optimized away by the backend into a host flag read.
|
||||
|
||||
### Calculation: LogicalShiftLeft
|
||||
|
||||
(<u32> result, <u1> carry_out) LogicalShiftLeft(<u32> operand, <u8> shift_amount, <u1> carry_in)
|
||||
|
||||
Pseudocode:
|
||||
|
||||
if shift_amount == 0:
|
||||
return (operand, carry_in)
|
||||
|
||||
x = operand * (2 ** shift_amount)
|
||||
result = Bits<31,0>(x)
|
||||
carry_out = Bit<32>(x)
|
||||
|
||||
return (result, carry_out)
|
||||
|
||||
This follows ARM semantics. Note `shift_amount` is not masked to 5 bits (like `SHL` does on x64).
|
||||
|
||||
### Calculation: LogicalShiftRight
|
||||
|
||||
(<u32> result, <u1> carry_out) LogicalShiftLeft(<u32> operand, <u8> shift_amount, <u1> carry_in)
|
||||
|
||||
Pseudocode:
|
||||
|
||||
if shift_amount == 0:
|
||||
return (operand, carry_in)
|
||||
|
||||
x = ZeroExtend(operand, from_size: 32, to_size: shift_amount+32)
|
||||
result = Bits<shift_amount+31,shift_amount>(x)
|
||||
carry_out = Bit<shift_amount-1>(x)
|
||||
|
||||
return (result, carry_out)
|
||||
|
||||
This follows ARM semantics. Note `shift_amount` is not masked to 5 bits (like `SHR` does on x64).
|
||||
|
||||
### Calculation: ArithmeticShiftRight
|
||||
|
||||
(<u32> result, <u1> carry_out) ArithmeticShiftRight(<u32> operand, <u8> shift_amount, <u1> carry_in)
|
||||
|
||||
Pseudocode:
|
||||
|
||||
if shift_amount == 0:
|
||||
return (operand, carry_in)
|
||||
|
||||
x = SignExtend(operand, from_size: 32, to_size: shift_amount+32)
|
||||
result = Bits<shift_amount+31,shift_amount>(x)
|
||||
carry_out = Bit<shift_amount-1>(x)
|
||||
|
||||
return (result, carry_out)
|
||||
|
||||
This follows ARM semantics. Note `shift_amount` is not masked to 5 bits (like `SAR` does on x64).
|
||||
|
||||
### Calcuation: RotateRight
|
||||
|
||||
(<u32> result, <u1> carry_out) RotateRight(<u32> operand, <u8> shift_amount, <u1> carry_in)
|
||||
|
||||
Pseudocode:
|
||||
|
||||
if shift_amount == 0:
|
||||
return (operand, carry_in)
|
||||
|
||||
shift_amount %= 32
|
||||
result = (operand << shift_amount) | (operand >> (32 - shift_amount))
|
||||
carry_out = Bit<31>(result)
|
||||
|
||||
return (result, carry_out)
|
||||
|
||||
### Calculation: AddWithCarry
|
||||
|
||||
(<u32> result, <u1> carry_out, <u1> overflow) AddWithCarry(<u32> a, <u32> b, <u1> carry_in)
|
||||
|
||||
a + b + carry_in
|
||||
|
||||
### Calculation: SubWithCarry
|
||||
|
||||
(<u32> result, <u1> carry_out, <u1> overflow) SubWithCarry(<u32> a, <u32> b, <u1> carry_in)
|
||||
|
||||
This has equivalent semantics to `AddWithCarry(a, Not(b), carry_in)`.
|
||||
|
||||
a - b - !carry_in
|
||||
|
||||
### Calculation: And
|
||||
|
||||
<u32> And(<u32> a, <u32> b)
|
||||
|
||||
### Calculation: Eor
|
||||
|
||||
<u32> Eor(<u32> a, <u32> b)
|
||||
|
||||
Exclusive OR (i.e.: XOR)
|
||||
|
||||
### Calculation: Or
|
||||
|
||||
<u32> Or(<u32> a, <u32> b)
|
||||
|
||||
### Calculation: Not
|
||||
|
||||
<u32> Not(<u32> value)
|
||||
|
||||
### Callback: {Read,Write}Memory{8,16,32,64}
|
||||
|
||||
<u8> ReadMemory8(<u32> vaddr)
|
||||
<u8> ReadMemory16(<u32> vaddr)
|
||||
<u8> ReadMemory32(<u32> vaddr)
|
||||
<u8> ReadMemory64(<u32> vaddr)
|
||||
<void> WriteMemory8(<u32> vaddr, <u8> value_to_store)
|
||||
<void> WriteMemory16(<u32> vaddr, <u16> value_to_store)
|
||||
<void> WriteMemory32(<u32> vaddr, <u32> value_to_store)
|
||||
<void> WriteMemory64(<u32> vaddr, <u64> value_to_store)
|
||||
|
||||
Memory access.
|
||||
|
||||
### Terminal: Interpret
|
||||
|
||||
SetTerm(IR::Term::Interpret{next})
|
||||
|
||||
This terminal instruction calls the interpreter, starting at `next`.
|
||||
The interpreter must interpret exactly one instruction.
|
||||
|
||||
### Terminal: ReturnToDispatch
|
||||
|
||||
SetTerm(IR::Term::ReturnToDispatch{})
|
||||
|
||||
This terminal instruction returns control to the dispatcher.
|
||||
The dispatcher will use the value in R15 to determine what comes next.
|
||||
|
||||
### Terminal: LinkBlock
|
||||
|
||||
SetTerm(IR::Term::LinkBlock{next})
|
||||
|
||||
This terminal instruction jumps to the basic block described by `next` if we have enough
|
||||
cycles remaining. If we do not have enough cycles remaining, we return to the
|
||||
dispatcher, which will return control to the host.
|
||||
|
||||
### Terminal: PopRSBHint
|
||||
|
||||
SetTerm(IR::Term::PopRSBHint{})
|
||||
|
||||
This terminal instruction checks the top of the Return Stack Buffer against R15.
|
||||
If RSB lookup fails, control is returned to the dispatcher.
|
||||
This is an optimization for faster function calls. A backend that doesn't support
|
||||
this optimization or doesn't have a RSB may choose to implement this exactly as
|
||||
ReturnToDispatch.
|
||||
|
||||
### Terminal: If
|
||||
|
||||
SetTerm(IR::Term::If{cond, term_then, term_else})
|
||||
|
||||
This terminal instruction conditionally executes one terminal or another depending
|
||||
on the run-time state of the ARM flags.
|
||||
2474
src/dynarmic/docs/Doxyfile
Normal file
2474
src/dynarmic/docs/Doxyfile
Normal file
File diff suppressed because it is too large
Load diff
76
src/dynarmic/docs/RegisterAllocator.md
Normal file
76
src/dynarmic/docs/RegisterAllocator.md
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
# Register Allocation (x64 Backend)
|
||||
|
||||
`HostLoc`s contain values. A `HostLoc` ("host value location") is either a host CPU register or a host spill location.
|
||||
|
||||
Values once set cannot be changed. Values can however be moved by the register allocator between `HostLoc`s. This is
|
||||
handled by the register allocator itself and code that uses the register allocator need not and should not move values
|
||||
between registers.
|
||||
|
||||
The register allocator is based on three concepts: `Use`, `Def` and `Scratch`.
|
||||
|
||||
* `Use`: The use of a value.
|
||||
* `Define`: The definition of a value, this is the only time when a value is set.
|
||||
* `Scratch`: Allocate a register that can be freely modified as one wishes.
|
||||
|
||||
Note that `Use`ing a value decrements its `use_count` by one. When the `use_count` reaches zero the value is discarded and no longer exists.
|
||||
|
||||
The member functions on `RegAlloc` are just a combination of the above concepts.
|
||||
|
||||
### `Scratch`
|
||||
|
||||
Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr)
|
||||
Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm)
|
||||
|
||||
At runtime, allocate one of the registers in `desired_locations`. You are free to modify the register. The register is discarded at the end of the allocation scope.
|
||||
|
||||
### Pure `Use`
|
||||
|
||||
Xbyak::Reg64 UseGpr(Argument& arg);
|
||||
Xbyak::Xmm UseXmm(Argument& arg);
|
||||
OpArg UseOpArg(Argument& arg);
|
||||
void Use(Argument& arg, HostLoc host_loc);
|
||||
|
||||
At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by
|
||||
which one of the above functions is called. `UseGpr` places it in an unused GPR, `UseXmm` places it
|
||||
in an unused XMM register, `UseOpArg` might be in a register or might be a memory location, and `Use` allows
|
||||
you to specify a specific register (GPR or XMM) to use.
|
||||
|
||||
This register **must not** have it's value changed.
|
||||
|
||||
### `UseScratch`
|
||||
|
||||
Xbyak::Reg64 UseScratchGpr(Argument& arg);
|
||||
Xbyak::Xmm UseScratchXmm(Argument& arg);
|
||||
void UseScratch(Argument& arg, HostLoc host_loc);
|
||||
|
||||
At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by
|
||||
which one of the above functions is called. `UseScratchGpr` places it in an unused GPR, `UseScratchXmm` places it
|
||||
in an unused XMM register, and `UseScratch` allows you to specify a specific register (GPR or XMM) to use.
|
||||
|
||||
The return value is the register allocated to you.
|
||||
|
||||
You are free to modify the value in the register. The register is discarded at the end of the allocation scope.
|
||||
|
||||
### `Define` as register
|
||||
|
||||
A `Define` is the defintion of a value. This is the only time when a value may be set.
|
||||
|
||||
void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg);
|
||||
|
||||
By calling `DefineValue`, you are stating that you wish to define the value for `inst`, and you have written the
|
||||
value to the specified register `reg`.
|
||||
|
||||
### `Define`ing as an alias of a different value
|
||||
|
||||
Adding a `Define` to an existing value.
|
||||
|
||||
void DefineValue(IR::Inst* inst, Argument& arg);
|
||||
|
||||
You are declaring that the value for `inst` is the same as the value for `arg`. No host machine instructions are
|
||||
emitted.
|
||||
|
||||
## When to use each?
|
||||
|
||||
* Prefer `Use` to `UseScratch` where possible.
|
||||
* Prefer the `OpArg` variants where possible.
|
||||
* Prefer to **not** use the specific `HostLoc` variants where possible.
|
||||
145
src/dynarmic/docs/ReturnStackBufferOptimization.md
Normal file
145
src/dynarmic/docs/ReturnStackBufferOptimization.md
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
# Return Stack Buffer Optimization (x64 Backend)
|
||||
|
||||
One of the optimizations that dynarmic does is block-linking. Block-linking is done when
|
||||
the destination address of a jump is available at JIT-time. Instead of returning to the
|
||||
dispatcher at the end of a block we can perform block-linking: just jump directly to the
|
||||
next block. This is beneficial because returning to the dispatcher can often be quite
|
||||
expensive.
|
||||
|
||||
What should we do in cases when we can't predict the destination address? The eponymous
|
||||
example is when executing a return statement at the end of a function; the return address
|
||||
is not statically known at compile time.
|
||||
|
||||
We deal with this by using a return stack buffer: When we execute a call instruction,
|
||||
we push our prediction onto the RSB. When we execute a return instruction, we pop a
|
||||
prediction off the RSB. If the prediction is a hit, we immediately jump to the relevant
|
||||
compiled block. Otherwise, we return to the dispatcher.
|
||||
|
||||
This is the essential idea behind this optimization.
|
||||
|
||||
## `UniqueHash`
|
||||
|
||||
One complication dynarmic has is that a compiled block is not uniquely identifiable by
|
||||
the PC alone, but bits in the FPSCR and CPSR are also relevant. We resolve this by
|
||||
computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block.
|
||||
|
||||
u64 LocationDescriptor::UniqueHash() const {
|
||||
// This value MUST BE UNIQUE.
|
||||
// This calculation has to match up with EmitX64::EmitTerminalPopRSBHint
|
||||
u64 pc_u64 = u64(arm_pc) << 32;
|
||||
u64 fpscr_u64 = u64(fpscr.Value());
|
||||
u64 t_u64 = cpsr.T() ? 1 : 0;
|
||||
u64 e_u64 = cpsr.E() ? 2 : 0;
|
||||
return pc_u64 | fpscr_u64 | t_u64 | e_u64;
|
||||
}
|
||||
|
||||
## Our implementation isn't actually a stack
|
||||
|
||||
Dynarmic's RSB isn't actually a stack. It was implemented as a ring buffer because
|
||||
that showed better performance in tests.
|
||||
|
||||
### RSB Structure
|
||||
|
||||
The RSB is implemented as a ring buffer. `rsb_ptr` is the index of the insertion
|
||||
point. Each element in `rsb_location_descriptors` is a `UniqueHash` and they
|
||||
each correspond to an element in `rsb_codeptrs`. `rsb_codeptrs` contains the
|
||||
host addresses for the corresponding the compiled blocks.
|
||||
|
||||
`RSBSize` was chosen by performance testing. Note that this is bigger than the
|
||||
size of the real RSB in hardware (which has 3 entries). Larger RSBs than 8
|
||||
showed degraded performance.
|
||||
|
||||
struct JitState {
|
||||
// ...
|
||||
|
||||
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
|
||||
u32 rsb_ptr = 0;
|
||||
std::array<u64, RSBSize> rsb_location_descriptors;
|
||||
std::array<u64, RSBSize> rsb_codeptrs;
|
||||
void ResetRSB();
|
||||
|
||||
// ...
|
||||
};
|
||||
|
||||
### RSB Push
|
||||
|
||||
We insert our prediction at the insertion point iff the RSB doesn't already
|
||||
contain a prediction with the same `UniqueHash`.
|
||||
|
||||
void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
ASSERT(inst->GetArg(0).IsImmediate());
|
||||
u64 imm64 = inst->GetArg(0).GetU64();
|
||||
|
||||
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX});
|
||||
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr();
|
||||
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32();
|
||||
u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end()
|
||||
? u64(unique_hash_to_code_ptr[imm64])
|
||||
: u64(code->GetReturnFromRunCodeAddress());
|
||||
|
||||
code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]);
|
||||
code->add(index_reg, 1);
|
||||
code->and_(index_reg, u32(JitState::RSBSize - 1));
|
||||
|
||||
code->mov(loc_desc_reg, u64(imm64));
|
||||
CodePtr patch_location = code->getCurr<CodePtr>();
|
||||
patch_unique_hash_locations[imm64].emplace_back(patch_location);
|
||||
code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch.
|
||||
code->EnsurePatchLocationSize(patch_location, 10);
|
||||
|
||||
Xbyak::Label label;
|
||||
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
||||
code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
||||
code->je(label, code->T_SHORT);
|
||||
}
|
||||
|
||||
code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg);
|
||||
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
|
||||
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
|
||||
code->L(label);
|
||||
}
|
||||
|
||||
In pseudocode:
|
||||
|
||||
for (i := 0 .. RSBSize-1)
|
||||
if (rsb_location_descriptors[i] == imm64)
|
||||
goto label;
|
||||
rsb_ptr++;
|
||||
rsb_ptr %= RSBSize;
|
||||
rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash
|
||||
rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */;
|
||||
label:
|
||||
|
||||
## RSB Pop
|
||||
|
||||
To check if a predicition is in the RSB, we linearly scan the RSB.
|
||||
|
||||
void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
// This calculation has to match up with IREmitter::PushRSB
|
||||
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
|
||||
code->shl(rcx, 32);
|
||||
code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]);
|
||||
code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]);
|
||||
code->or_(rbx, rcx);
|
||||
|
||||
code->mov(rax, u64(code->GetReturnFromRunCodeAddress()));
|
||||
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
||||
code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
||||
code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
|
||||
}
|
||||
|
||||
code->jmp(rax);
|
||||
}
|
||||
|
||||
In pseudocode:
|
||||
|
||||
rbx := ComputeUniqueHash()
|
||||
rax := ReturnToDispatch
|
||||
for (i := 0 .. RSBSize-1)
|
||||
if (rbx == rsb_location_descriptors[i])
|
||||
rax = rsb_codeptrs[i]
|
||||
goto rax
|
||||
116
src/dynarmic/externals/CMakeLists.txt
vendored
Normal file
116
src/dynarmic/externals/CMakeLists.txt
vendored
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
include(CPM)
|
||||
|
||||
# Always build externals as static libraries, even when dynarmic is built as shared
|
||||
if (BUILD_SHARED_LIBS)
|
||||
set(BUILD_SHARED_LIBS OFF)
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL ON)
|
||||
endif()
|
||||
|
||||
# Allow options shadowing with normal variables when subproject use old cmake policy
|
||||
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
|
||||
|
||||
# Disable tests in all externals supporting the standard option name
|
||||
set(BUILD_TESTING OFF)
|
||||
|
||||
# For libraries that already come with a CMakeLists file,
|
||||
# simply add the directory to that file as a subdirectory
|
||||
# to have CMake automatically recognize them.
|
||||
|
||||
# biscuit
|
||||
|
||||
if ("riscv" IN_LIST ARCHITECTURE)
|
||||
add_subdirectory(biscuit)
|
||||
|
||||
CPMAddPackage(
|
||||
NAME biscuit
|
||||
VERSION 0.9.1
|
||||
URL "https://github.com/lioncash/biscuit/archive/76b0be8dae.zip"
|
||||
URL_HASH SHA512=47d55ed02d032d6cf3dc107c6c0a9aea686d5f25aefb81d1af91db027b6815bd5add1755505e19d76625feeb17aa2db6cd1668fe0dad2e6a411519bde6ca4489
|
||||
CUSTOM_CACHE_KEY "76b0"
|
||||
)
|
||||
endif()
|
||||
|
||||
# catch
|
||||
|
||||
# TODO(crueter): dedup
|
||||
if (NOT TARGET Catch2::Catch2WithMain)
|
||||
if (DYNARMIC_TESTS)
|
||||
find_package(Catch2 3.0.1 REQUIRED)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# fmt
|
||||
|
||||
if (NOT TARGET fmt::fmt)
|
||||
# fmtlib formatting library
|
||||
set(FMT_INSTALL ON)
|
||||
add_subdirectory(fmt)
|
||||
endif()
|
||||
|
||||
# mcl
|
||||
CPMAddPackage(
|
||||
NAME mcl
|
||||
VERSION 0.1.12
|
||||
URL "https://github.com/azahar-emu/mcl/archive/7b08d83418.zip"
|
||||
URL_HASH SHA512=f943bac39c1879986decad7a442ff4288eaeca4a2907684c7914e115a55ecc43c2782ded85c0835763fe04e40d5c82220ce864423e489e648e408a84f54dc4f3
|
||||
OPTIONS
|
||||
"MCL_INSTALL ON"
|
||||
CUSTOM_CACHE_KEY "7b08"
|
||||
)
|
||||
|
||||
# oaknut
|
||||
|
||||
# if (NOT TARGET merry::oaknut)
|
||||
# if ("arm64" IN_LIST ARCHITECTURE)
|
||||
# add_subdirectory(oaknut)
|
||||
# elseif (DYNARMIC_TESTS)
|
||||
# add_subdirectory(oaknut EXCLUDE_FROM_ALL)
|
||||
# endif()
|
||||
# endif()
|
||||
|
||||
# unordered_dense
|
||||
|
||||
CPMAddPackage(
|
||||
NAME unordered_dense
|
||||
URL "https://github.com/Lizzie841/unordered_dense/archive/e59d30b7b1.zip"
|
||||
URL_HASH SHA512=71eff7bd9ba4b9226967bacd56a8ff000946f8813167cb5664bb01e96fb79e4e220684d824fe9c59c4d1cc98c606f13aff05b7940a1ed8ab3c95d6974ee34fa0
|
||||
FIND_PACKAGE_ARGUMENTS "CONFIG"
|
||||
OPTIONS
|
||||
"UNORDERED_DENSE_INSTALL ON"
|
||||
CUSTOM_CACHE_KEY "e59d"
|
||||
)
|
||||
|
||||
# xbyak
|
||||
# uncomment if in an independent repo
|
||||
|
||||
# if (NOT TARGET xbyak::xbyak)
|
||||
# if ("x86_64" IN_LIST ARCHITECTURE)
|
||||
# add_subdirectory(xbyak)
|
||||
# endif()
|
||||
# endif()
|
||||
|
||||
# zydis
|
||||
|
||||
if ("x86_64" IN_LIST ARCHITECTURE)
|
||||
CPMAddPackage(
|
||||
NAME Zycore
|
||||
URL "https://github.com/zyantific/zycore-c/archive/75a36c45ae.zip"
|
||||
URL_HASH SHA512=15aa399f39713e042c4345bc3175c82f14dca849fde2a21d4f591f62c43e227b70d868d8bb86beb5f4eb68b1d6bd3792cdd638acf89009e787e3d10ee7401924
|
||||
CUSTOM_CACHE_KEY "75a3"
|
||||
)
|
||||
|
||||
CPMAddPackage(
|
||||
NAME Zydis
|
||||
VERSION 4
|
||||
URL "https://github.com/zyantific/zydis/archive/c2d2bab025.zip"
|
||||
URL_HASH SHA512=7b48f213ff7aab2926f8c9c65195959143bebbfb2b9a25051ffd8b8b0f1baf1670d9739781de674577d955925f91ac89376e16b476a03828c84e2fd765d45020
|
||||
OPTIONS
|
||||
"ZYDIS_BUILD_TOOLS OFF"
|
||||
"ZYDIS_BUILD_EXAMPLES OFF"
|
||||
"ZYDIS_BUILD_DOXYGEN OFF"
|
||||
"ZYAN_ZYCORE_PATH ${Zycore_SOURCE_DIR}"
|
||||
"CMAKE_DISABLE_FIND_PACKAGE_Doxygen ON"
|
||||
CUSTOM_CACHE_KEY "c2d2"
|
||||
)
|
||||
endif()
|
||||
447
src/dynarmic/src/dynarmic/CMakeLists.txt
Normal file
447
src/dynarmic/src/dynarmic/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,447 @@
|
|||
include(TargetArchitectureSpecificSources)
|
||||
|
||||
add_library(dynarmic
|
||||
backend/block_range_information.cpp
|
||||
backend/block_range_information.h
|
||||
backend/exception_handler.h
|
||||
common/always_false.h
|
||||
common/assert.cpp
|
||||
common/assert.h
|
||||
common/cast_util.h
|
||||
common/common_types.h
|
||||
common/crypto/aes.cpp
|
||||
common/crypto/aes.h
|
||||
common/crypto/crc32.cpp
|
||||
common/crypto/crc32.h
|
||||
common/crypto/sm4.cpp
|
||||
common/crypto/sm4.h
|
||||
common/fp/fpcr.h
|
||||
common/fp/fpsr.h
|
||||
common/fp/fused.cpp
|
||||
common/fp/fused.h
|
||||
common/fp/info.h
|
||||
common/fp/mantissa_util.h
|
||||
common/fp/op.h
|
||||
common/fp/op/FPCompare.cpp
|
||||
common/fp/op/FPCompare.h
|
||||
common/fp/op/FPConvert.cpp
|
||||
common/fp/op/FPConvert.h
|
||||
common/fp/op/FPMulAdd.cpp
|
||||
common/fp/op/FPMulAdd.h
|
||||
common/fp/op/FPNeg.h
|
||||
common/fp/op/FPRecipEstimate.cpp
|
||||
common/fp/op/FPRecipEstimate.h
|
||||
common/fp/op/FPRecipExponent.cpp
|
||||
common/fp/op/FPRecipExponent.h
|
||||
common/fp/op/FPRecipStepFused.cpp
|
||||
common/fp/op/FPRecipStepFused.h
|
||||
common/fp/op/FPRoundInt.cpp
|
||||
common/fp/op/FPRoundInt.h
|
||||
common/fp/op/FPRSqrtEstimate.cpp
|
||||
common/fp/op/FPRSqrtEstimate.h
|
||||
common/fp/op/FPRSqrtStepFused.cpp
|
||||
common/fp/op/FPRSqrtStepFused.h
|
||||
common/fp/op/FPToFixed.cpp
|
||||
common/fp/op/FPToFixed.h
|
||||
common/fp/process_exception.cpp
|
||||
common/fp/process_exception.h
|
||||
common/fp/process_nan.cpp
|
||||
common/fp/process_nan.h
|
||||
common/fp/rounding_mode.h
|
||||
common/fp/unpacked.cpp
|
||||
common/fp/unpacked.h
|
||||
common/fp/util.h
|
||||
common/llvm_disassemble.cpp
|
||||
common/llvm_disassemble.h
|
||||
common/lut_from_list.h
|
||||
common/math_util.cpp
|
||||
common/math_util.h
|
||||
common/memory_pool.cpp
|
||||
common/memory_pool.h
|
||||
common/safe_ops.h
|
||||
common/spin_lock.h
|
||||
common/string_util.h
|
||||
common/u128.cpp
|
||||
common/u128.h
|
||||
common/variant_util.h
|
||||
frontend/A32/a32_types.cpp
|
||||
frontend/A32/a32_types.h
|
||||
frontend/A64/a64_types.cpp
|
||||
frontend/A64/a64_types.h
|
||||
frontend/decoder/decoder_detail.h
|
||||
frontend/decoder/matcher.h
|
||||
frontend/imm.cpp
|
||||
frontend/imm.h
|
||||
interface/exclusive_monitor.h
|
||||
interface/optimization_flags.h
|
||||
ir/acc_type.h
|
||||
ir/basic_block.cpp
|
||||
ir/basic_block.h
|
||||
ir/cond.h
|
||||
ir/ir_emitter.cpp
|
||||
ir/ir_emitter.h
|
||||
ir/location_descriptor.cpp
|
||||
ir/location_descriptor.h
|
||||
ir/microinstruction.cpp
|
||||
ir/microinstruction.h
|
||||
ir/opcodes.cpp
|
||||
ir/opcodes.h
|
||||
ir/opcodes.inc
|
||||
ir/opt/constant_propagation_pass.cpp
|
||||
ir/opt/dead_code_elimination_pass.cpp
|
||||
ir/opt/identity_removal_pass.cpp
|
||||
ir/opt/ir_matcher.h
|
||||
ir/opt/naming_pass.cpp
|
||||
ir/opt/passes.h
|
||||
ir/opt/polyfill_pass.cpp
|
||||
ir/opt/verification_pass.cpp
|
||||
ir/terminal.h
|
||||
ir/type.cpp
|
||||
ir/type.h
|
||||
ir/value.cpp
|
||||
ir/value.h
|
||||
)
|
||||
|
||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_sources(dynarmic PRIVATE
|
||||
frontend/A32/a32_ir_emitter.cpp
|
||||
frontend/A32/a32_ir_emitter.h
|
||||
frontend/A32/a32_location_descriptor.cpp
|
||||
frontend/A32/a32_location_descriptor.h
|
||||
frontend/A32/decoder/arm.h
|
||||
frontend/A32/decoder/arm.inc
|
||||
frontend/A32/decoder/asimd.h
|
||||
frontend/A32/decoder/asimd.inc
|
||||
frontend/A32/decoder/thumb16.h
|
||||
frontend/A32/decoder/thumb16.inc
|
||||
frontend/A32/decoder/thumb32.h
|
||||
frontend/A32/decoder/thumb32.inc
|
||||
frontend/A32/decoder/vfp.h
|
||||
frontend/A32/decoder/vfp.inc
|
||||
frontend/A32/disassembler/disassembler.h
|
||||
frontend/A32/disassembler/disassembler_arm.cpp
|
||||
frontend/A32/disassembler/disassembler_thumb.cpp
|
||||
frontend/A32/FPSCR.h
|
||||
frontend/A32/ITState.h
|
||||
frontend/A32/PSR.h
|
||||
frontend/A32/translate/a32_translate.cpp
|
||||
frontend/A32/translate/a32_translate.h
|
||||
frontend/A32/translate/conditional_state.cpp
|
||||
frontend/A32/translate/conditional_state.h
|
||||
frontend/A32/translate/translate_arm.cpp
|
||||
frontend/A32/translate/translate_thumb.cpp
|
||||
interface/A32/a32.h
|
||||
interface/A32/arch_version.h
|
||||
interface/A32/config.h
|
||||
interface/A32/coprocessor.h
|
||||
interface/A32/coprocessor_util.h
|
||||
interface/A32/disassembler.h
|
||||
ir/opt/a32_constant_memory_reads_pass.cpp
|
||||
ir/opt/a32_get_set_elimination_pass.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_sources(dynarmic PRIVATE
|
||||
frontend/A64/a64_ir_emitter.cpp
|
||||
frontend/A64/a64_ir_emitter.h
|
||||
frontend/A64/a64_location_descriptor.cpp
|
||||
frontend/A64/a64_location_descriptor.h
|
||||
frontend/A64/decoder/a64.h
|
||||
frontend/A64/decoder/a64.inc
|
||||
frontend/A64/translate/a64_translate.cpp
|
||||
frontend/A64/translate/a64_translate.h
|
||||
interface/A64/a64.h
|
||||
interface/A64/config.h
|
||||
ir/opt/a64_callback_config_pass.cpp
|
||||
ir/opt/a64_get_set_elimination_pass.cpp
|
||||
ir/opt/a64_merge_interpret_blocks.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("x86_64" IN_LIST ARCHITECTURE)
|
||||
target_compile_definitions(dynarmic PRIVATE XBYAK_OLD_DISP_CHECK=1)
|
||||
target_link_libraries(dynarmic
|
||||
PRIVATE
|
||||
xbyak::xbyak
|
||||
Zydis
|
||||
)
|
||||
|
||||
target_architecture_specific_sources(dynarmic "x86_64"
|
||||
backend/x64/abi.cpp
|
||||
backend/x64/abi.h
|
||||
backend/x64/block_of_code.cpp
|
||||
backend/x64/block_of_code.h
|
||||
backend/x64/callback.cpp
|
||||
backend/x64/callback.h
|
||||
backend/x64/constant_pool.cpp
|
||||
backend/x64/constant_pool.h
|
||||
backend/x64/constants.h
|
||||
backend/x64/devirtualize.h
|
||||
backend/x64/emit_x64.cpp
|
||||
backend/x64/emit_x64.h
|
||||
backend/x64/emit_x64_aes.cpp
|
||||
backend/x64/emit_x64_crc32.cpp
|
||||
backend/x64/emit_x64_data_processing.cpp
|
||||
backend/x64/emit_x64_floating_point.cpp
|
||||
backend/x64/emit_x64_memory.cpp.inc
|
||||
backend/x64/emit_x64_memory.h
|
||||
backend/x64/emit_x64_packed.cpp
|
||||
backend/x64/emit_x64_saturation.cpp
|
||||
backend/x64/emit_x64_sha.cpp
|
||||
backend/x64/emit_x64_sm4.cpp
|
||||
backend/x64/emit_x64_vector.cpp
|
||||
backend/x64/emit_x64_vector_floating_point.cpp
|
||||
backend/x64/emit_x64_vector_saturation.cpp
|
||||
backend/x64/exclusive_monitor.cpp
|
||||
backend/x64/exclusive_monitor_friend.h
|
||||
backend/x64/host_feature.h
|
||||
backend/x64/hostloc.cpp
|
||||
backend/x64/hostloc.h
|
||||
backend/x64/jitstate_info.h
|
||||
backend/x64/oparg.h
|
||||
backend/x64/perf_map.cpp
|
||||
backend/x64/perf_map.h
|
||||
backend/x64/reg_alloc.cpp
|
||||
backend/x64/reg_alloc.h
|
||||
backend/x64/stack_layout.h
|
||||
backend/x64/verbose_debugging_output.cpp
|
||||
backend/x64/verbose_debugging_output.h
|
||||
common/spin_lock_x64.cpp
|
||||
common/spin_lock_x64.h
|
||||
common/x64_disassemble.cpp
|
||||
common/x64_disassemble.h
|
||||
)
|
||||
|
||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_architecture_specific_sources(dynarmic "x86_64"
|
||||
backend/x64/a32_emit_x64.cpp
|
||||
backend/x64/a32_emit_x64.h
|
||||
backend/x64/a32_emit_x64_memory.cpp
|
||||
backend/x64/a32_interface.cpp
|
||||
backend/x64/a32_jitstate.cpp
|
||||
backend/x64/a32_jitstate.h
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_architecture_specific_sources(dynarmic "x86_64"
|
||||
backend/x64/a64_emit_x64.cpp
|
||||
backend/x64/a64_emit_x64.h
|
||||
backend/x64/a64_emit_x64_memory.cpp
|
||||
backend/x64/a64_interface.cpp
|
||||
backend/x64/a64_jitstate.cpp
|
||||
backend/x64/a64_jitstate.h
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if ("arm64" IN_LIST ARCHITECTURE)
|
||||
target_link_libraries(dynarmic PRIVATE merry::oaknut)
|
||||
|
||||
target_architecture_specific_sources(dynarmic "arm64"
|
||||
backend/arm64/a32_jitstate.cpp
|
||||
backend/arm64/a32_jitstate.h
|
||||
backend/arm64/a64_jitstate.h
|
||||
backend/arm64/abi.cpp
|
||||
backend/arm64/abi.h
|
||||
backend/arm64/address_space.cpp
|
||||
backend/arm64/address_space.h
|
||||
backend/arm64/devirtualize.h
|
||||
backend/arm64/emit_arm64.cpp
|
||||
backend/arm64/emit_arm64.h
|
||||
backend/arm64/emit_arm64_a32.cpp
|
||||
backend/arm64/emit_arm64_a32_coprocessor.cpp
|
||||
backend/arm64/emit_arm64_a32_memory.cpp
|
||||
backend/arm64/emit_arm64_a64.cpp
|
||||
backend/arm64/emit_arm64_a64_memory.cpp
|
||||
backend/arm64/emit_arm64_cryptography.cpp
|
||||
backend/arm64/emit_arm64_data_processing.cpp
|
||||
backend/arm64/emit_arm64_floating_point.cpp
|
||||
backend/arm64/emit_arm64_memory.cpp
|
||||
backend/arm64/emit_arm64_memory.h
|
||||
backend/arm64/emit_arm64_packed.cpp
|
||||
backend/arm64/emit_arm64_saturation.cpp
|
||||
backend/arm64/emit_arm64_vector.cpp
|
||||
backend/arm64/emit_arm64_vector_floating_point.cpp
|
||||
backend/arm64/emit_arm64_vector_saturation.cpp
|
||||
backend/arm64/emit_context.h
|
||||
backend/arm64/exclusive_monitor.cpp
|
||||
backend/arm64/fastmem.h
|
||||
backend/arm64/fpsr_manager.cpp
|
||||
backend/arm64/fpsr_manager.h
|
||||
backend/arm64/reg_alloc.cpp
|
||||
backend/arm64/reg_alloc.h
|
||||
backend/arm64/stack_layout.h
|
||||
backend/arm64/verbose_debugging_output.cpp
|
||||
backend/arm64/verbose_debugging_output.h
|
||||
common/spin_lock_arm64.cpp
|
||||
common/spin_lock_arm64.h
|
||||
)
|
||||
|
||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_architecture_specific_sources(dynarmic "arm64"
|
||||
backend/arm64/a32_address_space.cpp
|
||||
backend/arm64/a32_address_space.h
|
||||
backend/arm64/a32_core.h
|
||||
backend/arm64/a32_interface.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_architecture_specific_sources(dynarmic "arm64"
|
||||
backend/arm64/a64_address_space.cpp
|
||||
backend/arm64/a64_address_space.h
|
||||
backend/arm64/a64_core.h
|
||||
backend/arm64/a64_interface.cpp
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if ("riscv" IN_LIST ARCHITECTURE)
|
||||
target_link_libraries(dynarmic PRIVATE biscuit::biscuit)
|
||||
|
||||
target_sources(dynarmic PRIVATE
|
||||
backend/riscv64/abi.h
|
||||
backend/riscv64/a32_jitstate.cpp
|
||||
backend/riscv64/a32_jitstate.h
|
||||
backend/riscv64/emit_context.h
|
||||
backend/riscv64/emit_riscv64_a32.cpp
|
||||
backend/riscv64/emit_riscv64_a32_coprocessor.cpp
|
||||
backend/riscv64/emit_riscv64_a32_memory.cpp
|
||||
backend/riscv64/emit_riscv64_a64.cpp
|
||||
backend/riscv64/emit_riscv64_a64_memory.cpp
|
||||
backend/riscv64/emit_riscv64_cryptography.cpp
|
||||
backend/riscv64/emit_riscv64_data_processing.cpp
|
||||
backend/riscv64/emit_riscv64_floating_point.cpp
|
||||
backend/riscv64/emit_riscv64_packed.cpp
|
||||
backend/riscv64/emit_riscv64_saturation.cpp
|
||||
backend/riscv64/emit_riscv64_vector_floating_point.cpp
|
||||
backend/riscv64/emit_riscv64_vector_saturation.cpp
|
||||
backend/riscv64/emit_riscv64_vector.cpp
|
||||
backend/riscv64/emit_riscv64.cpp
|
||||
backend/riscv64/emit_riscv64.h
|
||||
backend/riscv64/reg_alloc.cpp
|
||||
backend/riscv64/reg_alloc.h
|
||||
backend/riscv64/stack_layout.h
|
||||
)
|
||||
|
||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_sources(dynarmic PRIVATE
|
||||
backend/riscv64/a32_address_space.cpp
|
||||
backend/riscv64/a32_address_space.h
|
||||
backend/riscv64/a32_core.h
|
||||
backend/riscv64/a32_interface.cpp
|
||||
backend/riscv64/code_block.h
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||
message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
target_sources(dynarmic PRIVATE backend/exception_handler_windows.cpp)
|
||||
elseif (APPLE)
|
||||
find_path(MACH_EXC_DEFS_DIR "mach/mach_exc.defs")
|
||||
if (NOT MACH_EXC_DEFS_DIR)
|
||||
message(WARNING "macOS fastmem disabled: unable to find mach/mach_exc.defs")
|
||||
target_sources(dynarmic PRIVATE backend/exception_handler_generic.cpp)
|
||||
else()
|
||||
message(STATUS "mach/mach_exc.defs location: ${MACH_EXC_DEFS_DIR}")
|
||||
execute_process(
|
||||
COMMAND
|
||||
mkdir -p "${CMAKE_CURRENT_SOURCE_DIR}/backend/x64/mig"
|
||||
COMMAND
|
||||
mig
|
||||
-arch x86_64
|
||||
-user "${CMAKE_CURRENT_SOURCE_DIR}/backend/x64/mig/mach_exc_user.c"
|
||||
-header "${CMAKE_CURRENT_SOURCE_DIR}/backend/x64/mig/mach_exc_user.h"
|
||||
-server "${CMAKE_CURRENT_SOURCE_DIR}/backend/x64/mig/mach_exc_server.c"
|
||||
-sheader "${CMAKE_CURRENT_SOURCE_DIR}/backend/x64/mig/mach_exc_server.h"
|
||||
"${MACH_EXC_DEFS_DIR}/mach/mach_exc.defs"
|
||||
)
|
||||
execute_process(
|
||||
COMMAND
|
||||
mkdir -p "${CMAKE_CURRENT_SOURCE_DIR}/backend/arm64/mig"
|
||||
COMMAND
|
||||
mig
|
||||
-arch arm64
|
||||
-user "${CMAKE_CURRENT_SOURCE_DIR}/backend/arm64/mig/mach_exc_user.c"
|
||||
-header "${CMAKE_CURRENT_SOURCE_DIR}/backend/arm64/mig/mach_exc_user.h"
|
||||
-server "${CMAKE_CURRENT_SOURCE_DIR}/backend/arm64/mig/mach_exc_server.c"
|
||||
-sheader "${CMAKE_CURRENT_SOURCE_DIR}/backend/arm64/mig/mach_exc_server.h"
|
||||
"${MACH_EXC_DEFS_DIR}/mach/mach_exc.defs"
|
||||
)
|
||||
target_sources(dynarmic PRIVATE
|
||||
backend/exception_handler_macos.cpp
|
||||
backend/exception_handler_macos_mig.c
|
||||
)
|
||||
endif()
|
||||
elseif (UNIX)
|
||||
if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
target_link_libraries(dynarmic PRIVATE rt)
|
||||
endif()
|
||||
target_sources(dynarmic PRIVATE backend/exception_handler_posix.cpp)
|
||||
else()
|
||||
target_sources(dynarmic PRIVATE backend/exception_handler_generic.cpp)
|
||||
endif()
|
||||
|
||||
include(CreateDirectoryGroups)
|
||||
create_target_directory_groups(dynarmic)
|
||||
|
||||
target_include_directories(dynarmic PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
|
||||
)
|
||||
set_target_properties(dynarmic PROPERTIES
|
||||
VERSION ${dynarmic_VERSION}
|
||||
SOVERSION ${dynarmic_VERSION_MAJOR}.${dynarmic_VERSION_MINOR}
|
||||
)
|
||||
|
||||
if (TARGET unordered_dense::unordered_dense)
|
||||
# weird quirk of system installs
|
||||
target_link_libraries(dynarmic
|
||||
PRIVATE
|
||||
unordered_dense::unordered_dense
|
||||
)
|
||||
endif()
|
||||
|
||||
target_compile_options(dynarmic PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_link_libraries(dynarmic
|
||||
PRIVATE
|
||||
Boost::boost
|
||||
fmt::fmt
|
||||
merry::mcl
|
||||
)
|
||||
if (DYNARMIC_USE_LLVM)
|
||||
target_include_directories(dynarmic PRIVATE ${LLVM_INCLUDE_DIRS})
|
||||
target_compile_definitions(dynarmic PRIVATE DYNARMIC_USE_LLVM=1 ${LLVM_DEFINITIONS})
|
||||
llvm_config(dynarmic USE_SHARED armdesc armdisassembler aarch64desc aarch64disassembler x86desc x86disassembler)
|
||||
endif()
|
||||
if (DYNARMIC_ENABLE_CPU_FEATURE_DETECTION)
|
||||
target_compile_definitions(dynarmic PRIVATE DYNARMIC_ENABLE_CPU_FEATURE_DETECTION=1)
|
||||
endif()
|
||||
if (DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT)
|
||||
target_compile_definitions(dynarmic PRIVATE DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT=1)
|
||||
endif()
|
||||
if (DYNARMIC_IGNORE_ASSERTS)
|
||||
target_compile_definitions(dynarmic PRIVATE MCL_IGNORE_ASSERTS=1)
|
||||
endif()
|
||||
if (CMAKE_SYSTEM_NAME STREQUAL "Windows")
|
||||
target_compile_definitions(dynarmic PRIVATE FMT_USE_WINDOWS_H=0)
|
||||
endif()
|
||||
target_compile_definitions(dynarmic PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
|
||||
if (DYNARMIC_USE_PRECOMPILED_HEADERS)
|
||||
set(PRECOMPILED_HEADERS "$<$<COMPILE_LANGUAGE:CXX>:${CMAKE_CURRENT_SOURCE_DIR}/ir/ir_emitter.h>")
|
||||
if ("x86_64" IN_LIST ARCHITECTURE)
|
||||
list(PREPEND PRECOMPILED_HEADERS "$<$<COMPILE_LANGUAGE:CXX>:<xbyak/xbyak.h$<ANGLE-R>>")
|
||||
endif()
|
||||
if ("arm64" IN_LIST ARCHITECTURE)
|
||||
list(PREPEND PRECOMPILED_HEADERS "$<$<COMPILE_LANGUAGE:CXX>:<oaknut/oaknut.hpp$<ANGLE-R>>")
|
||||
endif()
|
||||
target_precompile_headers(dynarmic PRIVATE ${PRECOMPILED_HEADERS})
|
||||
set(CMAKE_PCH_INSTANTIATE_TEMPLATES ON)
|
||||
endif()
|
||||
424
src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp
Normal file
424
src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp
Normal file
|
|
@ -0,0 +1,424 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/arm64/a32_address_space.h"
|
||||
|
||||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/devirtualize.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/stack_layout.h"
|
||||
#include "dynarmic/common/cast_util.h"
|
||||
#include "dynarmic/common/fp/fpcr.h"
|
||||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
#include "dynarmic/frontend/A32/translate/a32_translate.h"
|
||||
#include "dynarmic/interface/A32/config.h"
|
||||
#include "dynarmic/interface/exclusive_monitor.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
template<auto mfp, typename T>
|
||||
static void* EmitCallTrampoline(oaknut::CodeGenerator& code, T* this_) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
const auto info = Devirtualize<mfp>(this_);
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
void* target = code.xptr<void*>();
|
||||
code.LDR(X0, l_this);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BR(Xscratch0);
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(info.this_ptr);
|
||||
code.l(l_addr);
|
||||
code.dx(info.fn_ptr);
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
template<auto mfp, typename T>
|
||||
static void* EmitWrappedReadCallTrampoline(oaknut::CodeGenerator& code, T* this_) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
const auto info = Devirtualize<mfp>(this_);
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
constexpr u64 save_regs = ABI_CALLER_SAVE & ~ToRegList(Xscratch0);
|
||||
|
||||
void* target = code.xptr<void*>();
|
||||
ABI_PushRegisters(code, save_regs, 0);
|
||||
code.LDR(X0, l_this);
|
||||
code.MOV(X1, Xscratch0);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BLR(Xscratch0);
|
||||
code.MOV(Xscratch0, X0);
|
||||
ABI_PopRegisters(code, save_regs, 0);
|
||||
code.RET();
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(info.this_ptr);
|
||||
code.l(l_addr);
|
||||
code.dx(info.fn_ptr);
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
template<auto callback, typename T>
|
||||
static void* EmitExclusiveReadCallTrampoline(oaknut::CodeGenerator& code, const A32::UserConfig& conf) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
auto fn = [](const A32::UserConfig& conf, A32::VAddr vaddr) -> T {
|
||||
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
|
||||
return (conf.callbacks->*callback)(vaddr);
|
||||
});
|
||||
};
|
||||
|
||||
void* target = code.xptr<void*>();
|
||||
code.LDR(X0, l_this);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BR(Xscratch0);
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(mcl::bit_cast<u64>(&conf));
|
||||
code.l(l_addr);
|
||||
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
template<auto mfp, typename T>
|
||||
static void* EmitWrappedWriteCallTrampoline(oaknut::CodeGenerator& code, T* this_) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
const auto info = Devirtualize<mfp>(this_);
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
constexpr u64 save_regs = ABI_CALLER_SAVE;
|
||||
|
||||
void* target = code.xptr<void*>();
|
||||
ABI_PushRegisters(code, save_regs, 0);
|
||||
code.LDR(X0, l_this);
|
||||
code.MOV(X1, Xscratch0);
|
||||
code.MOV(X2, Xscratch1);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BLR(Xscratch0);
|
||||
ABI_PopRegisters(code, save_regs, 0);
|
||||
code.RET();
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(info.this_ptr);
|
||||
code.l(l_addr);
|
||||
code.dx(info.fn_ptr);
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
template<auto callback, typename T>
|
||||
static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const A32::UserConfig& conf) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
auto fn = [](const A32::UserConfig& conf, A32::VAddr vaddr, T value) -> u32 {
|
||||
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
|
||||
[&](T expected) -> bool {
|
||||
return (conf.callbacks->*callback)(vaddr, value, expected);
|
||||
})
|
||||
? 0
|
||||
: 1;
|
||||
};
|
||||
|
||||
void* target = code.xptr<void*>();
|
||||
code.LDR(X0, l_this);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BR(Xscratch0);
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(mcl::bit_cast<u64>(&conf));
|
||||
code.l(l_addr);
|
||||
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf)
|
||||
: AddressSpace(conf.code_cache_size)
|
||||
, conf(conf) {
|
||||
EmitPrelude();
|
||||
}
|
||||
|
||||
IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
|
||||
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
|
||||
|
||||
Optimization::PolyfillPass(ir_block, {});
|
||||
Optimization::NamingPass(ir_block);
|
||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
|
||||
Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true});
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
|
||||
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
Optimization::IdentityRemovalPass(ir_block);
|
||||
Optimization::VerificationPass(ir_block);
|
||||
|
||||
return ir_block;
|
||||
}
|
||||
|
||||
void A32AddressSpace::InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges) {
|
||||
InvalidateBasicBlocks(block_ranges.InvalidateRanges(ranges));
|
||||
}
|
||||
|
||||
void A32AddressSpace::EmitPrelude() {
|
||||
using namespace oaknut::util;
|
||||
|
||||
UnprotectCodeMemory();
|
||||
|
||||
prelude_info.read_memory_8 = EmitCallTrampoline<&A32::UserCallbacks::MemoryRead8>(code, conf.callbacks);
|
||||
prelude_info.read_memory_16 = EmitCallTrampoline<&A32::UserCallbacks::MemoryRead16>(code, conf.callbacks);
|
||||
prelude_info.read_memory_32 = EmitCallTrampoline<&A32::UserCallbacks::MemoryRead32>(code, conf.callbacks);
|
||||
prelude_info.read_memory_64 = EmitCallTrampoline<&A32::UserCallbacks::MemoryRead64>(code, conf.callbacks);
|
||||
prelude_info.wrapped_read_memory_8 = EmitWrappedReadCallTrampoline<&A32::UserCallbacks::MemoryRead8>(code, conf.callbacks);
|
||||
prelude_info.wrapped_read_memory_16 = EmitWrappedReadCallTrampoline<&A32::UserCallbacks::MemoryRead16>(code, conf.callbacks);
|
||||
prelude_info.wrapped_read_memory_32 = EmitWrappedReadCallTrampoline<&A32::UserCallbacks::MemoryRead32>(code, conf.callbacks);
|
||||
prelude_info.wrapped_read_memory_64 = EmitWrappedReadCallTrampoline<&A32::UserCallbacks::MemoryRead64>(code, conf.callbacks);
|
||||
prelude_info.exclusive_read_memory_8 = EmitExclusiveReadCallTrampoline<&A32::UserCallbacks::MemoryRead8, u8>(code, conf);
|
||||
prelude_info.exclusive_read_memory_16 = EmitExclusiveReadCallTrampoline<&A32::UserCallbacks::MemoryRead16, u16>(code, conf);
|
||||
prelude_info.exclusive_read_memory_32 = EmitExclusiveReadCallTrampoline<&A32::UserCallbacks::MemoryRead32, u32>(code, conf);
|
||||
prelude_info.exclusive_read_memory_64 = EmitExclusiveReadCallTrampoline<&A32::UserCallbacks::MemoryRead64, u64>(code, conf);
|
||||
prelude_info.write_memory_8 = EmitCallTrampoline<&A32::UserCallbacks::MemoryWrite8>(code, conf.callbacks);
|
||||
prelude_info.write_memory_16 = EmitCallTrampoline<&A32::UserCallbacks::MemoryWrite16>(code, conf.callbacks);
|
||||
prelude_info.write_memory_32 = EmitCallTrampoline<&A32::UserCallbacks::MemoryWrite32>(code, conf.callbacks);
|
||||
prelude_info.write_memory_64 = EmitCallTrampoline<&A32::UserCallbacks::MemoryWrite64>(code, conf.callbacks);
|
||||
prelude_info.wrapped_write_memory_8 = EmitWrappedWriteCallTrampoline<&A32::UserCallbacks::MemoryWrite8>(code, conf.callbacks);
|
||||
prelude_info.wrapped_write_memory_16 = EmitWrappedWriteCallTrampoline<&A32::UserCallbacks::MemoryWrite16>(code, conf.callbacks);
|
||||
prelude_info.wrapped_write_memory_32 = EmitWrappedWriteCallTrampoline<&A32::UserCallbacks::MemoryWrite32>(code, conf.callbacks);
|
||||
prelude_info.wrapped_write_memory_64 = EmitWrappedWriteCallTrampoline<&A32::UserCallbacks::MemoryWrite64>(code, conf.callbacks);
|
||||
prelude_info.exclusive_write_memory_8 = EmitExclusiveWriteCallTrampoline<&A32::UserCallbacks::MemoryWriteExclusive8, u8>(code, conf);
|
||||
prelude_info.exclusive_write_memory_16 = EmitExclusiveWriteCallTrampoline<&A32::UserCallbacks::MemoryWriteExclusive16, u16>(code, conf);
|
||||
prelude_info.exclusive_write_memory_32 = EmitExclusiveWriteCallTrampoline<&A32::UserCallbacks::MemoryWriteExclusive32, u32>(code, conf);
|
||||
prelude_info.exclusive_write_memory_64 = EmitExclusiveWriteCallTrampoline<&A32::UserCallbacks::MemoryWriteExclusive64, u64>(code, conf);
|
||||
prelude_info.call_svc = EmitCallTrampoline<&A32::UserCallbacks::CallSVC>(code, conf.callbacks);
|
||||
prelude_info.exception_raised = EmitCallTrampoline<&A32::UserCallbacks::ExceptionRaised>(code, conf.callbacks);
|
||||
prelude_info.isb_raised = EmitCallTrampoline<&A32::UserCallbacks::InstructionSynchronizationBarrierRaised>(code, conf.callbacks);
|
||||
prelude_info.add_ticks = EmitCallTrampoline<&A32::UserCallbacks::AddTicks>(code, conf.callbacks);
|
||||
prelude_info.get_ticks_remaining = EmitCallTrampoline<&A32::UserCallbacks::GetTicksRemaining>(code, conf.callbacks);
|
||||
|
||||
oaknut::Label return_from_run_code, l_return_to_dispatcher;
|
||||
|
||||
prelude_info.run_code = code.xptr<PreludeInfo::RunCodeFuncType>();
|
||||
{
|
||||
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
|
||||
|
||||
code.MOV(X19, X0);
|
||||
code.MOV(Xstate, X1);
|
||||
code.MOV(Xhalt, X2);
|
||||
if (conf.page_table) {
|
||||
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
|
||||
}
|
||||
if (conf.fastmem_pointer) {
|
||||
code.MOV(Xfastmem, *conf.fastmem_pointer);
|
||||
}
|
||||
|
||||
if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer)) {
|
||||
code.LDR(Xscratch0, l_return_to_dispatcher);
|
||||
for (size_t i = 0; i < RSBCount; i++) {
|
||||
code.STR(Xscratch0, SP, offsetof(StackLayout, rsb) + offsetof(RSBEntry, code_ptr) + i * sizeof(RSBEntry));
|
||||
}
|
||||
}
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.BL(prelude_info.get_ticks_remaining);
|
||||
code.MOV(Xticks, X0);
|
||||
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
|
||||
}
|
||||
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
|
||||
code.AND(Wscratch0, Wscratch0, 0xffff0000);
|
||||
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
|
||||
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
|
||||
code.LDAR(Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch0, return_from_run_code);
|
||||
|
||||
code.BR(X19);
|
||||
}
|
||||
|
||||
prelude_info.step_code = code.xptr<PreludeInfo::RunCodeFuncType>();
|
||||
{
|
||||
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
|
||||
|
||||
code.MOV(X19, X0);
|
||||
code.MOV(Xstate, X1);
|
||||
code.MOV(Xhalt, X2);
|
||||
if (conf.page_table) {
|
||||
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
|
||||
}
|
||||
if (conf.fastmem_pointer) {
|
||||
code.MOV(Xfastmem, *conf.fastmem_pointer);
|
||||
}
|
||||
|
||||
if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer)) {
|
||||
code.LDR(Xscratch0, l_return_to_dispatcher);
|
||||
for (size_t i = 0; i < RSBCount; i++) {
|
||||
code.STR(Xscratch0, SP, offsetof(StackLayout, rsb) + offsetof(RSBEntry, code_ptr) + i * sizeof(RSBEntry));
|
||||
}
|
||||
}
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.MOV(Xticks, 1);
|
||||
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
|
||||
}
|
||||
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
|
||||
code.AND(Wscratch0, Wscratch0, 0xffff0000);
|
||||
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
|
||||
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
|
||||
oaknut::Label step_hr_loop;
|
||||
code.l(step_hr_loop);
|
||||
code.LDAXR(Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch0, return_from_run_code);
|
||||
code.ORR(Wscratch0, Wscratch0, static_cast<u32>(HaltReason::Step));
|
||||
code.STLXR(Wscratch1, Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch1, step_hr_loop);
|
||||
|
||||
code.BR(X19);
|
||||
}
|
||||
|
||||
prelude_info.return_to_dispatcher = code.xptr<void*>();
|
||||
{
|
||||
oaknut::Label l_this, l_addr;
|
||||
|
||||
code.LDAR(Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch0, return_from_run_code);
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.CMP(Xticks, 0);
|
||||
code.B(LE, return_from_run_code);
|
||||
}
|
||||
|
||||
code.LDR(X0, l_this);
|
||||
code.MOV(X1, Xstate);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BLR(Xscratch0);
|
||||
code.BR(X0);
|
||||
|
||||
const auto fn = [](A32AddressSpace& self, A32JitState& context) -> CodePtr {
|
||||
return self.GetOrEmit(context.GetLocationDescriptor());
|
||||
};
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(mcl::bit_cast<u64>(this));
|
||||
code.l(l_addr);
|
||||
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
|
||||
}
|
||||
|
||||
prelude_info.return_from_run_code = code.xptr<void*>();
|
||||
{
|
||||
code.l(return_from_run_code);
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.SUB(X1, X1, Xticks);
|
||||
code.BL(prelude_info.add_ticks);
|
||||
}
|
||||
|
||||
code.LDR(Wscratch0, SP, offsetof(StackLayout, save_host_fpcr));
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
|
||||
oaknut::Label exit_hr_loop;
|
||||
code.l(exit_hr_loop);
|
||||
code.LDAXR(W0, Xhalt);
|
||||
code.STLXR(Wscratch0, WZR, Xhalt);
|
||||
code.CBNZ(Wscratch0, exit_hr_loop);
|
||||
|
||||
ABI_PopRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
|
||||
code.RET();
|
||||
}
|
||||
|
||||
code.align(8);
|
||||
code.l(l_return_to_dispatcher);
|
||||
code.dx(mcl::bit_cast<u64>(prelude_info.return_to_dispatcher));
|
||||
|
||||
prelude_info.end_of_prelude = code.offset();
|
||||
|
||||
mem.invalidate_all();
|
||||
ProtectCodeMemory();
|
||||
}
|
||||
|
||||
EmitConfig A32AddressSpace::GetEmitConfig() {
|
||||
return EmitConfig{
|
||||
.optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations,
|
||||
|
||||
.hook_isb = conf.hook_isb,
|
||||
|
||||
.cntfreq_el0{},
|
||||
.ctr_el0{},
|
||||
.dczid_el0{},
|
||||
.tpidrro_el0{},
|
||||
.tpidr_el0{},
|
||||
|
||||
.check_halt_on_memory_access = conf.check_halt_on_memory_access,
|
||||
|
||||
.page_table_pointer = mcl::bit_cast<u64>(conf.page_table),
|
||||
.page_table_address_space_bits = 32,
|
||||
.page_table_pointer_mask_bits = conf.page_table_pointer_mask_bits,
|
||||
.silently_mirror_page_table = true,
|
||||
.absolute_offset_page_table = conf.absolute_offset_page_table,
|
||||
.detect_misaligned_access_via_page_table = conf.detect_misaligned_access_via_page_table,
|
||||
.only_detect_misalignment_via_page_table_on_page_boundary = conf.only_detect_misalignment_via_page_table_on_page_boundary,
|
||||
|
||||
.fastmem_pointer = conf.fastmem_pointer,
|
||||
.recompile_on_fastmem_failure = conf.recompile_on_fastmem_failure,
|
||||
.fastmem_address_space_bits = 32,
|
||||
.silently_mirror_fastmem = true,
|
||||
|
||||
.wall_clock_cntpct = conf.wall_clock_cntpct,
|
||||
.enable_cycle_counting = conf.enable_cycle_counting,
|
||||
|
||||
.always_little_endian = conf.always_little_endian,
|
||||
|
||||
.descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return FP::FPCR{A32::LocationDescriptor{location}.FPSCR().Value()}; },
|
||||
.emit_cond = EmitA32Cond,
|
||||
.emit_condition_failed_terminal = EmitA32ConditionFailedTerminal,
|
||||
.emit_terminal = EmitA32Terminal,
|
||||
.emit_check_memory_abort = EmitA32CheckMemoryAbort,
|
||||
|
||||
.state_nzcv_offset = offsetof(A32JitState, cpsr_nzcv),
|
||||
.state_fpsr_offset = offsetof(A32JitState, fpsr),
|
||||
.state_exclusive_state_offset = offsetof(A32JitState, exclusive_state),
|
||||
|
||||
.coprocessors = conf.coprocessors,
|
||||
|
||||
.very_verbose_debugging_output = conf.very_verbose_debugging_output,
|
||||
};
|
||||
}
|
||||
|
||||
void A32AddressSpace::RegisterNewBasicBlock(const IR::Block& block, const EmittedBlockInfo&) {
|
||||
const A32::LocationDescriptor descriptor{block.Location()};
|
||||
const A32::LocationDescriptor end_location{block.EndLocation()};
|
||||
const auto range = boost::icl::discrete_interval<u32>::closed(descriptor.PC(), end_location.PC() - 1);
|
||||
block_ranges.AddRange(range, descriptor);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
35
src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.h
Normal file
35
src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.h
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "dynarmic/backend/arm64/address_space.h"
|
||||
#include "dynarmic/backend/block_range_information.h"
|
||||
#include "dynarmic/interface/A32/config.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
struct EmittedBlockInfo;
|
||||
|
||||
class A32AddressSpace final : public AddressSpace {
|
||||
public:
|
||||
explicit A32AddressSpace(const A32::UserConfig& conf);
|
||||
|
||||
IR::Block GenerateIR(IR::LocationDescriptor) const override;
|
||||
|
||||
void InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges);
|
||||
|
||||
protected:
|
||||
friend class A32Core;
|
||||
|
||||
void EmitPrelude();
|
||||
EmitConfig GetEmitConfig() override;
|
||||
void RegisterNewBasicBlock(const IR::Block& block, const EmittedBlockInfo& block_info) override;
|
||||
|
||||
const A32::UserConfig conf;
|
||||
BlockRangeInformation<u32> block_ranges;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
30
src/dynarmic/src/dynarmic/backend/arm64/a32_core.h
Normal file
30
src/dynarmic/src/dynarmic/backend/arm64/a32_core.h
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "dynarmic/backend/arm64/a32_address_space.h"
|
||||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
class A32Core final {
|
||||
public:
|
||||
explicit A32Core(const A32::UserConfig&) {}
|
||||
|
||||
HaltReason Run(A32AddressSpace& process, A32JitState& thread_ctx, volatile u32* halt_reason) {
|
||||
const auto location_descriptor = thread_ctx.GetLocationDescriptor();
|
||||
const auto entry_point = process.GetOrEmit(location_descriptor);
|
||||
return process.prelude_info.run_code(entry_point, &thread_ctx, halt_reason);
|
||||
}
|
||||
|
||||
HaltReason Step(A32AddressSpace& process, A32JitState& thread_ctx, volatile u32* halt_reason) {
|
||||
const auto location_descriptor = A32::LocationDescriptor{thread_ctx.GetLocationDescriptor()}.SetSingleStepping(true);
|
||||
const auto entry_point = process.GetOrEmit(location_descriptor);
|
||||
return process.prelude_info.step_code(entry_point, &thread_ctx, halt_reason);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
242
src/dynarmic/src/dynarmic/backend/arm64/a32_interface.cpp
Normal file
242
src/dynarmic/src/dynarmic/backend/arm64/a32_interface.cpp
Normal file
|
|
@ -0,0 +1,242 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2021 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
#include <boost/icl/interval_set.hpp>
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include <mcl/scope_exit.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/backend/arm64/a32_address_space.h"
|
||||
#include "dynarmic/backend/arm64/a32_core.h"
|
||||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
||||
#include "dynarmic/common/atomic.h"
|
||||
#include "dynarmic/interface/A32/a32.h"
|
||||
|
||||
namespace Dynarmic::A32 {
|
||||
|
||||
using namespace Backend::Arm64;
|
||||
|
||||
struct Jit::Impl final {
|
||||
Impl(Jit* jit_interface, A32::UserConfig conf)
|
||||
: jit_interface(jit_interface)
|
||||
, conf(conf)
|
||||
, current_address_space(conf)
|
||||
, core(conf) {}
|
||||
|
||||
HaltReason Run() {
|
||||
ASSERT(!jit_interface->is_executing);
|
||||
PerformRequestedCacheInvalidation(static_cast<HaltReason>(Atomic::Load(&halt_reason)));
|
||||
|
||||
jit_interface->is_executing = true;
|
||||
SCOPE_EXIT {
|
||||
jit_interface->is_executing = false;
|
||||
};
|
||||
|
||||
HaltReason hr = core.Run(current_address_space, current_state, &halt_reason);
|
||||
|
||||
PerformRequestedCacheInvalidation(hr);
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
HaltReason Step() {
|
||||
ASSERT(!jit_interface->is_executing);
|
||||
PerformRequestedCacheInvalidation(static_cast<HaltReason>(Atomic::Load(&halt_reason)));
|
||||
|
||||
jit_interface->is_executing = true;
|
||||
SCOPE_EXIT {
|
||||
jit_interface->is_executing = false;
|
||||
};
|
||||
|
||||
HaltReason hr = core.Step(current_address_space, current_state, &halt_reason);
|
||||
|
||||
PerformRequestedCacheInvalidation(hr);
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
void ClearCache() {
|
||||
std::unique_lock lock{invalidation_mutex};
|
||||
invalidate_entire_cache = true;
|
||||
HaltExecution(HaltReason::CacheInvalidation);
|
||||
}
|
||||
|
||||
void InvalidateCacheRange(std::uint32_t start_address, std::size_t length) {
|
||||
std::unique_lock lock{invalidation_mutex};
|
||||
invalid_cache_ranges.add(boost::icl::discrete_interval<u32>::closed(start_address, static_cast<u32>(start_address + length - 1)));
|
||||
HaltExecution(HaltReason::CacheInvalidation);
|
||||
}
|
||||
|
||||
void Reset() {
|
||||
current_state = {};
|
||||
}
|
||||
|
||||
void HaltExecution(HaltReason hr) {
|
||||
Atomic::Or(&halt_reason, static_cast<u32>(hr));
|
||||
Atomic::Barrier();
|
||||
}
|
||||
|
||||
void ClearHalt(HaltReason hr) {
|
||||
Atomic::And(&halt_reason, ~static_cast<u32>(hr));
|
||||
Atomic::Barrier();
|
||||
}
|
||||
|
||||
std::array<std::uint32_t, 16>& Regs() {
|
||||
return current_state.regs;
|
||||
}
|
||||
|
||||
const std::array<std::uint32_t, 16>& Regs() const {
|
||||
return current_state.regs;
|
||||
}
|
||||
|
||||
std::array<std::uint32_t, 64>& ExtRegs() {
|
||||
return current_state.ext_regs;
|
||||
}
|
||||
|
||||
const std::array<std::uint32_t, 64>& ExtRegs() const {
|
||||
return current_state.ext_regs;
|
||||
}
|
||||
|
||||
std::uint32_t Cpsr() const {
|
||||
return current_state.Cpsr();
|
||||
}
|
||||
|
||||
void SetCpsr(std::uint32_t value) {
|
||||
current_state.SetCpsr(value);
|
||||
}
|
||||
|
||||
std::uint32_t Fpscr() const {
|
||||
return current_state.Fpscr();
|
||||
}
|
||||
|
||||
void SetFpscr(std::uint32_t value) {
|
||||
current_state.SetFpscr(value);
|
||||
}
|
||||
|
||||
void ClearExclusiveState() {
|
||||
current_state.exclusive_state = false;
|
||||
}
|
||||
|
||||
void DumpDisassembly() const {
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
private:
|
||||
void PerformRequestedCacheInvalidation(HaltReason hr) {
|
||||
if (Has(hr, HaltReason::CacheInvalidation)) {
|
||||
std::unique_lock lock{invalidation_mutex};
|
||||
|
||||
ClearHalt(HaltReason::CacheInvalidation);
|
||||
|
||||
if (invalidate_entire_cache) {
|
||||
current_address_space.ClearCache();
|
||||
|
||||
invalidate_entire_cache = false;
|
||||
invalid_cache_ranges.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
if (!invalid_cache_ranges.empty()) {
|
||||
current_address_space.InvalidateCacheRanges(invalid_cache_ranges);
|
||||
|
||||
invalid_cache_ranges.clear();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Jit* jit_interface;
|
||||
A32::UserConfig conf;
|
||||
A32JitState current_state{};
|
||||
A32AddressSpace current_address_space;
|
||||
A32Core core;
|
||||
|
||||
volatile u32 halt_reason = 0;
|
||||
|
||||
std::mutex invalidation_mutex;
|
||||
boost::icl::interval_set<u32> invalid_cache_ranges;
|
||||
bool invalidate_entire_cache = false;
|
||||
};
|
||||
|
||||
Jit::Jit(UserConfig conf)
|
||||
: impl(std::make_unique<Impl>(this, conf)) {}
|
||||
|
||||
Jit::~Jit() = default;
|
||||
|
||||
HaltReason Jit::Run() {
|
||||
return impl->Run();
|
||||
}
|
||||
|
||||
HaltReason Jit::Step() {
|
||||
return impl->Step();
|
||||
}
|
||||
|
||||
void Jit::ClearCache() {
|
||||
impl->ClearCache();
|
||||
}
|
||||
|
||||
void Jit::InvalidateCacheRange(std::uint32_t start_address, std::size_t length) {
|
||||
impl->InvalidateCacheRange(start_address, length);
|
||||
}
|
||||
|
||||
void Jit::Reset() {
|
||||
impl->Reset();
|
||||
}
|
||||
|
||||
void Jit::HaltExecution(HaltReason hr) {
|
||||
impl->HaltExecution(hr);
|
||||
}
|
||||
|
||||
void Jit::ClearHalt(HaltReason hr) {
|
||||
impl->ClearHalt(hr);
|
||||
}
|
||||
|
||||
std::array<std::uint32_t, 16>& Jit::Regs() {
|
||||
return impl->Regs();
|
||||
}
|
||||
|
||||
const std::array<std::uint32_t, 16>& Jit::Regs() const {
|
||||
return impl->Regs();
|
||||
}
|
||||
|
||||
std::array<std::uint32_t, 64>& Jit::ExtRegs() {
|
||||
return impl->ExtRegs();
|
||||
}
|
||||
|
||||
const std::array<std::uint32_t, 64>& Jit::ExtRegs() const {
|
||||
return impl->ExtRegs();
|
||||
}
|
||||
|
||||
std::uint32_t Jit::Cpsr() const {
|
||||
return impl->Cpsr();
|
||||
}
|
||||
|
||||
void Jit::SetCpsr(std::uint32_t value) {
|
||||
impl->SetCpsr(value);
|
||||
}
|
||||
|
||||
std::uint32_t Jit::Fpscr() const {
|
||||
return impl->Fpscr();
|
||||
}
|
||||
|
||||
void Jit::SetFpscr(std::uint32_t value) {
|
||||
impl->SetFpscr(value);
|
||||
}
|
||||
|
||||
void Jit::ClearExclusiveState() {
|
||||
impl->ClearExclusiveState();
|
||||
}
|
||||
|
||||
void Jit::DumpDisassembly() const {
|
||||
impl->DumpDisassembly();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
77
src/dynarmic/src/dynarmic/backend/arm64/a32_jitstate.cpp
Normal file
77
src/dynarmic/src/dynarmic/backend/arm64/a32_jitstate.cpp
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
||||
|
||||
#include <mcl/bit/bit_field.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
u32 A32JitState::Cpsr() const {
|
||||
u32 cpsr = 0;
|
||||
|
||||
// NZCV flags
|
||||
cpsr |= cpsr_nzcv;
|
||||
// Q flag
|
||||
cpsr |= cpsr_q;
|
||||
// GE flags
|
||||
cpsr |= mcl::bit::get_bit<31>(cpsr_ge) ? 1 << 19 : 0;
|
||||
cpsr |= mcl::bit::get_bit<23>(cpsr_ge) ? 1 << 18 : 0;
|
||||
cpsr |= mcl::bit::get_bit<15>(cpsr_ge) ? 1 << 17 : 0;
|
||||
cpsr |= mcl::bit::get_bit<7>(cpsr_ge) ? 1 << 16 : 0;
|
||||
// E flag, T flag
|
||||
cpsr |= mcl::bit::get_bit<1>(upper_location_descriptor) ? 1 << 9 : 0;
|
||||
cpsr |= mcl::bit::get_bit<0>(upper_location_descriptor) ? 1 << 5 : 0;
|
||||
// IT state
|
||||
cpsr |= static_cast<u32>(upper_location_descriptor & 0b11111100'00000000);
|
||||
cpsr |= static_cast<u32>(upper_location_descriptor & 0b00000011'00000000) << 17;
|
||||
// Other flags
|
||||
cpsr |= cpsr_jaifm;
|
||||
|
||||
return cpsr;
|
||||
}
|
||||
|
||||
void A32JitState::SetCpsr(u32 cpsr) {
|
||||
// NZCV flags
|
||||
cpsr_nzcv = cpsr & 0xF0000000;
|
||||
// Q flag
|
||||
cpsr_q = cpsr & (1 << 27);
|
||||
// GE flags
|
||||
cpsr_ge = 0;
|
||||
cpsr_ge |= mcl::bit::get_bit<19>(cpsr) ? 0xFF000000 : 0;
|
||||
cpsr_ge |= mcl::bit::get_bit<18>(cpsr) ? 0x00FF0000 : 0;
|
||||
cpsr_ge |= mcl::bit::get_bit<17>(cpsr) ? 0x0000FF00 : 0;
|
||||
cpsr_ge |= mcl::bit::get_bit<16>(cpsr) ? 0x000000FF : 0;
|
||||
|
||||
upper_location_descriptor &= 0xFFFF0000;
|
||||
// E flag, T flag
|
||||
upper_location_descriptor |= mcl::bit::get_bit<9>(cpsr) ? 2 : 0;
|
||||
upper_location_descriptor |= mcl::bit::get_bit<5>(cpsr) ? 1 : 0;
|
||||
// IT state
|
||||
upper_location_descriptor |= (cpsr >> 0) & 0b11111100'00000000;
|
||||
upper_location_descriptor |= (cpsr >> 17) & 0b00000011'00000000;
|
||||
|
||||
// Other flags
|
||||
cpsr_jaifm = cpsr & 0x010001DF;
|
||||
}
|
||||
|
||||
constexpr u32 FPCR_MASK = A32::LocationDescriptor::FPSCR_MODE_MASK;
|
||||
constexpr u32 FPSR_MASK = 0x0800'009f;
|
||||
|
||||
u32 A32JitState::Fpscr() const {
|
||||
return (upper_location_descriptor & 0xffff'0000) | fpsr | fpsr_nzcv;
|
||||
}
|
||||
|
||||
void A32JitState::SetFpscr(u32 fpscr) {
|
||||
fpsr_nzcv = fpscr & 0xf000'0000;
|
||||
fpsr = fpscr & FPSR_MASK;
|
||||
upper_location_descriptor = (upper_location_descriptor & 0x0000'ffff) | (fpscr & FPCR_MASK);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
48
src/dynarmic/src/dynarmic/backend/arm64/a32_jitstate.h
Normal file
48
src/dynarmic/src/dynarmic/backend/arm64/a32_jitstate.h
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2021 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
#include "dynarmic/ir/location_descriptor.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
struct A32JitState {
|
||||
u32 cpsr_nzcv = 0;
|
||||
u32 cpsr_q = 0;
|
||||
u32 cpsr_jaifm = 0;
|
||||
u32 cpsr_ge = 0;
|
||||
|
||||
u32 fpsr = 0;
|
||||
u32 fpsr_nzcv = 0;
|
||||
|
||||
std::array<u32, 16> regs{};
|
||||
|
||||
u32 upper_location_descriptor;
|
||||
|
||||
alignas(16) std::array<u32, 64> ext_regs{};
|
||||
|
||||
u32 exclusive_state = 0;
|
||||
|
||||
u32 Cpsr() const;
|
||||
void SetCpsr(u32 cpsr);
|
||||
|
||||
u32 Fpscr() const;
|
||||
void SetFpscr(u32 fpscr);
|
||||
|
||||
IR::LocationDescriptor GetLocationDescriptor() const {
|
||||
return IR::LocationDescriptor{regs[15] | (static_cast<u64>(upper_location_descriptor) << 32)};
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
600
src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp
Normal file
600
src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp
Normal file
|
|
@ -0,0 +1,600 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/arm64/a64_address_space.h"
|
||||
|
||||
#include "dynarmic/backend/arm64/a64_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/devirtualize.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/stack_layout.h"
|
||||
#include "dynarmic/common/cast_util.h"
|
||||
#include "dynarmic/frontend/A64/a64_location_descriptor.h"
|
||||
#include "dynarmic/frontend/A64/translate/a64_translate.h"
|
||||
#include "dynarmic/interface/A64/config.h"
|
||||
#include "dynarmic/interface/exclusive_monitor.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
template<auto mfp, typename T>
|
||||
static void* EmitCallTrampoline(oaknut::CodeGenerator& code, T* this_) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
const auto info = Devirtualize<mfp>(this_);
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
void* target = code.xptr<void*>();
|
||||
code.LDR(X0, l_this);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BR(Xscratch0);
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(info.this_ptr);
|
||||
code.l(l_addr);
|
||||
code.dx(info.fn_ptr);
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
template<auto mfp, typename T>
|
||||
static void* EmitWrappedReadCallTrampoline(oaknut::CodeGenerator& code, T* this_) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
const auto info = Devirtualize<mfp>(this_);
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
constexpr u64 save_regs = ABI_CALLER_SAVE & ~ToRegList(Xscratch0);
|
||||
|
||||
void* target = code.xptr<void*>();
|
||||
ABI_PushRegisters(code, save_regs, 0);
|
||||
code.LDR(X0, l_this);
|
||||
code.MOV(X1, Xscratch0);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BLR(Xscratch0);
|
||||
code.MOV(Xscratch0, X0);
|
||||
ABI_PopRegisters(code, save_regs, 0);
|
||||
code.RET();
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(info.this_ptr);
|
||||
code.l(l_addr);
|
||||
code.dx(info.fn_ptr);
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
template<auto callback, typename T>
|
||||
static void* EmitExclusiveReadCallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr) -> T {
|
||||
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
|
||||
return (conf.callbacks->*callback)(vaddr);
|
||||
});
|
||||
};
|
||||
|
||||
void* target = code.xptr<void*>();
|
||||
code.LDR(X0, l_this);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BR(Xscratch0);
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(mcl::bit_cast<u64>(&conf));
|
||||
code.l(l_addr);
|
||||
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
template<auto mfp, typename T>
|
||||
static void* EmitWrappedWriteCallTrampoline(oaknut::CodeGenerator& code, T* this_) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
const auto info = Devirtualize<mfp>(this_);
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
constexpr u64 save_regs = ABI_CALLER_SAVE;
|
||||
|
||||
void* target = code.xptr<void*>();
|
||||
ABI_PushRegisters(code, save_regs, 0);
|
||||
code.LDR(X0, l_this);
|
||||
code.MOV(X1, Xscratch0);
|
||||
code.MOV(X2, Xscratch1);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BLR(Xscratch0);
|
||||
ABI_PopRegisters(code, save_regs, 0);
|
||||
code.RET();
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(info.this_ptr);
|
||||
code.l(l_addr);
|
||||
code.dx(info.fn_ptr);
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
template<auto callback, typename T>
|
||||
static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, T value) -> u32 {
|
||||
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
|
||||
[&](T expected) -> bool {
|
||||
return (conf.callbacks->*callback)(vaddr, value, expected);
|
||||
})
|
||||
? 0
|
||||
: 1;
|
||||
};
|
||||
|
||||
void* target = code.xptr<void*>();
|
||||
code.LDR(X0, l_this);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BR(Xscratch0);
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(mcl::bit_cast<u64>(&conf));
|
||||
code.l(l_addr);
|
||||
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
static void* EmitRead128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
const auto info = Devirtualize<&A64::UserCallbacks::MemoryRead128>(this_);
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
void* target = code.xptr<void*>();
|
||||
ABI_PushRegisters(code, (1ull << 29) | (1ull << 30), 0);
|
||||
code.LDR(X0, l_this);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BLR(Xscratch0);
|
||||
code.FMOV(D0, X0);
|
||||
code.FMOV(V0.D()[1], X1);
|
||||
ABI_PopRegisters(code, (1ull << 29) | (1ull << 30), 0);
|
||||
code.RET();
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(info.this_ptr);
|
||||
code.l(l_addr);
|
||||
code.dx(info.fn_ptr);
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
static void* EmitWrappedRead128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
const auto info = Devirtualize<&A64::UserCallbacks::MemoryRead128>(this_);
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
constexpr u64 save_regs = ABI_CALLER_SAVE & ~ToRegList(Q0);
|
||||
|
||||
void* target = code.xptr<void*>();
|
||||
ABI_PushRegisters(code, save_regs, 0);
|
||||
code.LDR(X0, l_this);
|
||||
code.MOV(X1, Xscratch0);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BLR(Xscratch0);
|
||||
code.FMOV(D0, X0);
|
||||
code.FMOV(V0.D()[1], X1);
|
||||
ABI_PopRegisters(code, save_regs, 0);
|
||||
code.RET();
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(info.this_ptr);
|
||||
code.l(l_addr);
|
||||
code.dx(info.fn_ptr);
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
static void* EmitExclusiveRead128CallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr) -> Vector {
|
||||
return conf.global_monitor->ReadAndMark<Vector>(conf.processor_id, vaddr, [&]() -> Vector {
|
||||
return conf.callbacks->MemoryRead128(vaddr);
|
||||
});
|
||||
};
|
||||
|
||||
void* target = code.xptr<void*>();
|
||||
ABI_PushRegisters(code, (1ull << 29) | (1ull << 30), 0);
|
||||
code.LDR(X0, l_this);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BLR(Xscratch0);
|
||||
code.FMOV(D0, X0);
|
||||
code.FMOV(V0.D()[1], X1);
|
||||
ABI_PopRegisters(code, (1ull << 29) | (1ull << 30), 0);
|
||||
code.RET();
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(mcl::bit_cast<u64>(&conf));
|
||||
code.l(l_addr);
|
||||
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
static void* EmitWrite128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
const auto info = Devirtualize<&A64::UserCallbacks::MemoryWrite128>(this_);
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
void* target = code.xptr<void*>();
|
||||
code.LDR(X0, l_this);
|
||||
code.FMOV(X2, D0);
|
||||
code.FMOV(X3, V0.D()[1]);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BR(Xscratch0);
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(info.this_ptr);
|
||||
code.l(l_addr);
|
||||
code.dx(info.fn_ptr);
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
static void* EmitWrappedWrite128CallTrampoline(oaknut::CodeGenerator& code, A64::UserCallbacks* this_) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
const auto info = Devirtualize<&A64::UserCallbacks::MemoryWrite128>(this_);
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
constexpr u64 save_regs = ABI_CALLER_SAVE;
|
||||
|
||||
void* target = code.xptr<void*>();
|
||||
ABI_PushRegisters(code, save_regs, 0);
|
||||
code.LDR(X0, l_this);
|
||||
code.MOV(X1, Xscratch0);
|
||||
code.FMOV(X2, D0);
|
||||
code.FMOV(X3, V0.D()[1]);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BLR(Xscratch0);
|
||||
ABI_PopRegisters(code, save_regs, 0);
|
||||
code.RET();
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(info.this_ptr);
|
||||
code.l(l_addr);
|
||||
code.dx(info.fn_ptr);
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
static void* EmitExclusiveWrite128CallTrampoline(oaknut::CodeGenerator& code, const A64::UserConfig& conf) {
|
||||
using namespace oaknut::util;
|
||||
|
||||
oaknut::Label l_addr, l_this;
|
||||
|
||||
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, Vector value) -> u32 {
|
||||
return conf.global_monitor->DoExclusiveOperation<Vector>(conf.processor_id, vaddr,
|
||||
[&](Vector expected) -> bool {
|
||||
return conf.callbacks->MemoryWriteExclusive128(vaddr, value, expected);
|
||||
})
|
||||
? 0
|
||||
: 1;
|
||||
};
|
||||
|
||||
void* target = code.xptr<void*>();
|
||||
code.LDR(X0, l_this);
|
||||
code.FMOV(X2, D0);
|
||||
code.FMOV(X3, V0.D()[1]);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BR(Xscratch0);
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(mcl::bit_cast<u64>(&conf));
|
||||
code.l(l_addr);
|
||||
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
A64AddressSpace::A64AddressSpace(const A64::UserConfig& conf)
|
||||
: AddressSpace(conf.code_cache_size)
|
||||
, conf(conf) {
|
||||
EmitPrelude();
|
||||
}
|
||||
|
||||
IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
|
||||
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
|
||||
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code,
|
||||
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
|
||||
|
||||
Optimization::A64CallbackConfigPass(ir_block, conf);
|
||||
Optimization::NamingPass(ir_block);
|
||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) {
|
||||
Optimization::A64GetSetElimination(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) {
|
||||
Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks);
|
||||
}
|
||||
Optimization::VerificationPass(ir_block);
|
||||
|
||||
return ir_block;
|
||||
}
|
||||
|
||||
void A64AddressSpace::InvalidateCacheRanges(const boost::icl::interval_set<u64>& ranges) {
|
||||
InvalidateBasicBlocks(block_ranges.InvalidateRanges(ranges));
|
||||
}
|
||||
|
||||
void A64AddressSpace::EmitPrelude() {
|
||||
using namespace oaknut::util;
|
||||
|
||||
UnprotectCodeMemory();
|
||||
|
||||
prelude_info.read_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead8>(code, conf.callbacks);
|
||||
prelude_info.read_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead16>(code, conf.callbacks);
|
||||
prelude_info.read_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead32>(code, conf.callbacks);
|
||||
prelude_info.read_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryRead64>(code, conf.callbacks);
|
||||
prelude_info.read_memory_128 = EmitRead128CallTrampoline(code, conf.callbacks);
|
||||
prelude_info.wrapped_read_memory_8 = EmitWrappedReadCallTrampoline<&A64::UserCallbacks::MemoryRead8>(code, conf.callbacks);
|
||||
prelude_info.wrapped_read_memory_16 = EmitWrappedReadCallTrampoline<&A64::UserCallbacks::MemoryRead16>(code, conf.callbacks);
|
||||
prelude_info.wrapped_read_memory_32 = EmitWrappedReadCallTrampoline<&A64::UserCallbacks::MemoryRead32>(code, conf.callbacks);
|
||||
prelude_info.wrapped_read_memory_64 = EmitWrappedReadCallTrampoline<&A64::UserCallbacks::MemoryRead64>(code, conf.callbacks);
|
||||
prelude_info.wrapped_read_memory_128 = EmitWrappedRead128CallTrampoline(code, conf.callbacks);
|
||||
prelude_info.exclusive_read_memory_8 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead8, u8>(code, conf);
|
||||
prelude_info.exclusive_read_memory_16 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead16, u16>(code, conf);
|
||||
prelude_info.exclusive_read_memory_32 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead32, u32>(code, conf);
|
||||
prelude_info.exclusive_read_memory_64 = EmitExclusiveReadCallTrampoline<&A64::UserCallbacks::MemoryRead64, u64>(code, conf);
|
||||
prelude_info.exclusive_read_memory_128 = EmitExclusiveRead128CallTrampoline(code, conf);
|
||||
prelude_info.write_memory_8 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite8>(code, conf.callbacks);
|
||||
prelude_info.write_memory_16 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite16>(code, conf.callbacks);
|
||||
prelude_info.write_memory_32 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite32>(code, conf.callbacks);
|
||||
prelude_info.write_memory_64 = EmitCallTrampoline<&A64::UserCallbacks::MemoryWrite64>(code, conf.callbacks);
|
||||
prelude_info.write_memory_128 = EmitWrite128CallTrampoline(code, conf.callbacks);
|
||||
prelude_info.wrapped_write_memory_8 = EmitWrappedWriteCallTrampoline<&A64::UserCallbacks::MemoryWrite8>(code, conf.callbacks);
|
||||
prelude_info.wrapped_write_memory_16 = EmitWrappedWriteCallTrampoline<&A64::UserCallbacks::MemoryWrite16>(code, conf.callbacks);
|
||||
prelude_info.wrapped_write_memory_32 = EmitWrappedWriteCallTrampoline<&A64::UserCallbacks::MemoryWrite32>(code, conf.callbacks);
|
||||
prelude_info.wrapped_write_memory_64 = EmitWrappedWriteCallTrampoline<&A64::UserCallbacks::MemoryWrite64>(code, conf.callbacks);
|
||||
prelude_info.wrapped_write_memory_128 = EmitWrappedWrite128CallTrampoline(code, conf.callbacks);
|
||||
prelude_info.exclusive_write_memory_8 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive8, u8>(code, conf);
|
||||
prelude_info.exclusive_write_memory_16 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive16, u16>(code, conf);
|
||||
prelude_info.exclusive_write_memory_32 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive32, u32>(code, conf);
|
||||
prelude_info.exclusive_write_memory_64 = EmitExclusiveWriteCallTrampoline<&A64::UserCallbacks::MemoryWriteExclusive64, u64>(code, conf);
|
||||
prelude_info.exclusive_write_memory_128 = EmitExclusiveWrite128CallTrampoline(code, conf);
|
||||
prelude_info.call_svc = EmitCallTrampoline<&A64::UserCallbacks::CallSVC>(code, conf.callbacks);
|
||||
prelude_info.exception_raised = EmitCallTrampoline<&A64::UserCallbacks::ExceptionRaised>(code, conf.callbacks);
|
||||
prelude_info.isb_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionSynchronizationBarrierRaised>(code, conf.callbacks);
|
||||
prelude_info.ic_raised = EmitCallTrampoline<&A64::UserCallbacks::InstructionCacheOperationRaised>(code, conf.callbacks);
|
||||
prelude_info.dc_raised = EmitCallTrampoline<&A64::UserCallbacks::DataCacheOperationRaised>(code, conf.callbacks);
|
||||
prelude_info.get_cntpct = EmitCallTrampoline<&A64::UserCallbacks::GetCNTPCT>(code, conf.callbacks);
|
||||
prelude_info.add_ticks = EmitCallTrampoline<&A64::UserCallbacks::AddTicks>(code, conf.callbacks);
|
||||
prelude_info.get_ticks_remaining = EmitCallTrampoline<&A64::UserCallbacks::GetTicksRemaining>(code, conf.callbacks);
|
||||
|
||||
oaknut::Label return_from_run_code, l_return_to_dispatcher;
|
||||
|
||||
prelude_info.run_code = code.xptr<PreludeInfo::RunCodeFuncType>();
|
||||
{
|
||||
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
|
||||
|
||||
code.MOV(X19, X0);
|
||||
code.MOV(Xstate, X1);
|
||||
code.MOV(Xhalt, X2);
|
||||
if (conf.page_table) {
|
||||
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
|
||||
}
|
||||
if (conf.fastmem_pointer) {
|
||||
code.MOV(Xfastmem, *conf.fastmem_pointer);
|
||||
}
|
||||
|
||||
if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer)) {
|
||||
code.LDR(Xscratch0, l_return_to_dispatcher);
|
||||
for (size_t i = 0; i < RSBCount; i++) {
|
||||
code.STR(Xscratch0, SP, offsetof(StackLayout, rsb) + offsetof(RSBEntry, code_ptr) + i * sizeof(RSBEntry));
|
||||
}
|
||||
}
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.BL(prelude_info.get_ticks_remaining);
|
||||
code.MOV(Xticks, X0);
|
||||
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
|
||||
}
|
||||
|
||||
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
|
||||
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr));
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
|
||||
code.LDAR(Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch0, return_from_run_code);
|
||||
|
||||
code.BR(X19);
|
||||
}
|
||||
|
||||
prelude_info.step_code = code.xptr<PreludeInfo::RunCodeFuncType>();
|
||||
{
|
||||
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
|
||||
|
||||
code.MOV(X19, X0);
|
||||
code.MOV(Xstate, X1);
|
||||
code.MOV(Xhalt, X2);
|
||||
if (conf.page_table) {
|
||||
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
|
||||
}
|
||||
if (conf.fastmem_pointer) {
|
||||
code.MOV(Xfastmem, *conf.fastmem_pointer);
|
||||
}
|
||||
|
||||
if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer)) {
|
||||
code.LDR(Xscratch0, l_return_to_dispatcher);
|
||||
for (size_t i = 0; i < RSBCount; i++) {
|
||||
code.STR(Xscratch0, SP, offsetof(StackLayout, rsb) + offsetof(RSBEntry, code_ptr) + i * sizeof(RSBEntry));
|
||||
}
|
||||
}
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.MOV(Xticks, 1);
|
||||
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
|
||||
}
|
||||
|
||||
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
|
||||
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A64JitState, fpcr));
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
|
||||
oaknut::Label step_hr_loop;
|
||||
code.l(step_hr_loop);
|
||||
code.LDAXR(Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch0, return_from_run_code);
|
||||
code.ORR(Wscratch0, Wscratch0, static_cast<u32>(HaltReason::Step));
|
||||
code.STLXR(Wscratch1, Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch1, step_hr_loop);
|
||||
|
||||
code.BR(X19);
|
||||
}
|
||||
|
||||
prelude_info.return_to_dispatcher = code.xptr<void*>();
|
||||
{
|
||||
oaknut::Label l_this, l_addr;
|
||||
|
||||
code.LDAR(Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch0, return_from_run_code);
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.CMP(Xticks, 0);
|
||||
code.B(LE, return_from_run_code);
|
||||
}
|
||||
|
||||
code.LDR(X0, l_this);
|
||||
code.MOV(X1, Xstate);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BLR(Xscratch0);
|
||||
code.BR(X0);
|
||||
|
||||
const auto fn = [](A64AddressSpace& self, A64JitState& context) -> CodePtr {
|
||||
return self.GetOrEmit(context.GetLocationDescriptor());
|
||||
};
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(mcl::bit_cast<u64>(this));
|
||||
code.l(l_addr);
|
||||
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
|
||||
}
|
||||
|
||||
prelude_info.return_from_run_code = code.xptr<void*>();
|
||||
{
|
||||
code.l(return_from_run_code);
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.SUB(X1, X1, Xticks);
|
||||
code.BL(prelude_info.add_ticks);
|
||||
}
|
||||
|
||||
code.LDR(Wscratch0, SP, offsetof(StackLayout, save_host_fpcr));
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
|
||||
oaknut::Label exit_hr_loop;
|
||||
code.l(exit_hr_loop);
|
||||
code.LDAXR(W0, Xhalt);
|
||||
code.STLXR(Wscratch0, WZR, Xhalt);
|
||||
code.CBNZ(Wscratch0, exit_hr_loop);
|
||||
|
||||
ABI_PopRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
|
||||
code.RET();
|
||||
}
|
||||
|
||||
code.align(8);
|
||||
code.l(l_return_to_dispatcher);
|
||||
code.dx(mcl::bit_cast<u64>(prelude_info.return_to_dispatcher));
|
||||
|
||||
prelude_info.end_of_prelude = code.offset();
|
||||
|
||||
mem.invalidate_all();
|
||||
ProtectCodeMemory();
|
||||
}
|
||||
|
||||
EmitConfig A64AddressSpace::GetEmitConfig() {
|
||||
return EmitConfig{
|
||||
.optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations,
|
||||
|
||||
.hook_isb = conf.hook_isb,
|
||||
|
||||
.cntfreq_el0 = conf.cntfrq_el0,
|
||||
.ctr_el0 = conf.ctr_el0,
|
||||
.dczid_el0 = conf.dczid_el0,
|
||||
.tpidrro_el0 = conf.tpidrro_el0,
|
||||
.tpidr_el0 = conf.tpidr_el0,
|
||||
|
||||
.check_halt_on_memory_access = conf.check_halt_on_memory_access,
|
||||
|
||||
.page_table_pointer = mcl::bit_cast<u64>(conf.page_table),
|
||||
.page_table_address_space_bits = conf.page_table_address_space_bits,
|
||||
.page_table_pointer_mask_bits = conf.page_table_pointer_mask_bits,
|
||||
.silently_mirror_page_table = conf.silently_mirror_page_table,
|
||||
.absolute_offset_page_table = conf.absolute_offset_page_table,
|
||||
.detect_misaligned_access_via_page_table = conf.detect_misaligned_access_via_page_table,
|
||||
.only_detect_misalignment_via_page_table_on_page_boundary = conf.only_detect_misalignment_via_page_table_on_page_boundary,
|
||||
|
||||
.fastmem_pointer = conf.fastmem_pointer,
|
||||
.recompile_on_fastmem_failure = conf.recompile_on_fastmem_failure,
|
||||
.fastmem_address_space_bits = conf.fastmem_address_space_bits,
|
||||
.silently_mirror_fastmem = conf.silently_mirror_fastmem,
|
||||
|
||||
.wall_clock_cntpct = conf.wall_clock_cntpct,
|
||||
.enable_cycle_counting = conf.enable_cycle_counting,
|
||||
|
||||
.always_little_endian = true,
|
||||
|
||||
.descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return A64::LocationDescriptor{location}.FPCR(); },
|
||||
.emit_cond = EmitA64Cond,
|
||||
.emit_condition_failed_terminal = EmitA64ConditionFailedTerminal,
|
||||
.emit_terminal = EmitA64Terminal,
|
||||
.emit_check_memory_abort = EmitA64CheckMemoryAbort,
|
||||
|
||||
.state_nzcv_offset = offsetof(A64JitState, cpsr_nzcv),
|
||||
.state_fpsr_offset = offsetof(A64JitState, fpsr),
|
||||
.state_exclusive_state_offset = offsetof(A64JitState, exclusive_state),
|
||||
|
||||
.coprocessors{},
|
||||
|
||||
.very_verbose_debugging_output = conf.very_verbose_debugging_output,
|
||||
};
|
||||
}
|
||||
|
||||
void A64AddressSpace::RegisterNewBasicBlock(const IR::Block& block, const EmittedBlockInfo&) {
|
||||
const A64::LocationDescriptor descriptor{block.Location()};
|
||||
const A64::LocationDescriptor end_location{block.EndLocation()};
|
||||
const auto range = boost::icl::discrete_interval<u64>::closed(descriptor.PC(), end_location.PC() - 1);
|
||||
block_ranges.AddRange(range, descriptor);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
35
src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.h
Normal file
35
src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.h
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "dynarmic/backend/arm64/address_space.h"
|
||||
#include "dynarmic/backend/block_range_information.h"
|
||||
#include "dynarmic/interface/A64/config.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
struct EmittedBlockInfo;
|
||||
|
||||
class A64AddressSpace final : public AddressSpace {
|
||||
public:
|
||||
explicit A64AddressSpace(const A64::UserConfig& conf);
|
||||
|
||||
IR::Block GenerateIR(IR::LocationDescriptor) const override;
|
||||
|
||||
void InvalidateCacheRanges(const boost::icl::interval_set<u64>& ranges);
|
||||
|
||||
protected:
|
||||
friend class A64Core;
|
||||
|
||||
void EmitPrelude();
|
||||
EmitConfig GetEmitConfig() override;
|
||||
void RegisterNewBasicBlock(const IR::Block& block, const EmittedBlockInfo& block_info) override;
|
||||
|
||||
const A64::UserConfig conf;
|
||||
BlockRangeInformation<u64> block_ranges;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
30
src/dynarmic/src/dynarmic/backend/arm64/a64_core.h
Normal file
30
src/dynarmic/src/dynarmic/backend/arm64/a64_core.h
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "dynarmic/backend/arm64/a64_address_space.h"
|
||||
#include "dynarmic/backend/arm64/a64_jitstate.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
class A64Core final {
|
||||
public:
|
||||
explicit A64Core(const A64::UserConfig&) {}
|
||||
|
||||
HaltReason Run(A64AddressSpace& process, A64JitState& thread_ctx, volatile u32* halt_reason) {
|
||||
const auto location_descriptor = thread_ctx.GetLocationDescriptor();
|
||||
const auto entry_point = process.GetOrEmit(location_descriptor);
|
||||
return process.prelude_info.run_code(entry_point, &thread_ctx, halt_reason);
|
||||
}
|
||||
|
||||
HaltReason Step(A64AddressSpace& process, A64JitState& thread_ctx, volatile u32* halt_reason) {
|
||||
const auto location_descriptor = A64::LocationDescriptor{thread_ctx.GetLocationDescriptor()}.SetSingleStepping(true);
|
||||
const auto entry_point = process.GetOrEmit(location_descriptor);
|
||||
return process.prelude_info.step_code(entry_point, &thread_ctx, halt_reason);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
326
src/dynarmic/src/dynarmic/backend/arm64/a64_interface.cpp
Normal file
326
src/dynarmic/src/dynarmic/backend/arm64/a64_interface.cpp
Normal file
|
|
@ -0,0 +1,326 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
#include <boost/icl/interval_set.hpp>
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include <mcl/scope_exit.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/backend/arm64/a64_address_space.h"
|
||||
#include "dynarmic/backend/arm64/a64_core.h"
|
||||
#include "dynarmic/backend/arm64/a64_jitstate.h"
|
||||
#include "dynarmic/common/atomic.h"
|
||||
#include "dynarmic/interface/A64/a64.h"
|
||||
#include "dynarmic/interface/A64/config.h"
|
||||
|
||||
namespace Dynarmic::A64 {
|
||||
|
||||
using namespace Backend::Arm64;
|
||||
|
||||
struct Jit::Impl final {
|
||||
Impl(Jit*, A64::UserConfig conf)
|
||||
: conf(conf)
|
||||
, current_address_space(conf)
|
||||
, core(conf) {}
|
||||
|
||||
HaltReason Run() {
|
||||
ASSERT(!is_executing);
|
||||
PerformRequestedCacheInvalidation(static_cast<HaltReason>(Atomic::Load(&halt_reason)));
|
||||
|
||||
is_executing = true;
|
||||
SCOPE_EXIT {
|
||||
is_executing = false;
|
||||
};
|
||||
|
||||
HaltReason hr = core.Run(current_address_space, current_state, &halt_reason);
|
||||
|
||||
PerformRequestedCacheInvalidation(hr);
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
HaltReason Step() {
|
||||
ASSERT(!is_executing);
|
||||
PerformRequestedCacheInvalidation(static_cast<HaltReason>(Atomic::Load(&halt_reason)));
|
||||
|
||||
is_executing = true;
|
||||
SCOPE_EXIT {
|
||||
is_executing = false;
|
||||
};
|
||||
|
||||
HaltReason hr = core.Step(current_address_space, current_state, &halt_reason);
|
||||
|
||||
PerformRequestedCacheInvalidation(hr);
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
void ClearCache() {
|
||||
std::unique_lock lock{invalidation_mutex};
|
||||
invalidate_entire_cache = true;
|
||||
HaltExecution(HaltReason::CacheInvalidation);
|
||||
}
|
||||
|
||||
void InvalidateCacheRange(std::uint64_t start_address, std::size_t length) {
|
||||
std::unique_lock lock{invalidation_mutex};
|
||||
invalid_cache_ranges.add(boost::icl::discrete_interval<u64>::closed(start_address, start_address + length - 1));
|
||||
HaltExecution(HaltReason::CacheInvalidation);
|
||||
}
|
||||
|
||||
void Reset() {
|
||||
current_state = {};
|
||||
}
|
||||
|
||||
void HaltExecution(HaltReason hr) {
|
||||
Atomic::Or(&halt_reason, static_cast<u32>(hr));
|
||||
}
|
||||
|
||||
void ClearHalt(HaltReason hr) {
|
||||
Atomic::And(&halt_reason, ~static_cast<u32>(hr));
|
||||
}
|
||||
|
||||
std::uint64_t PC() const {
|
||||
return current_state.pc;
|
||||
}
|
||||
|
||||
void SetPC(std::uint64_t value) {
|
||||
current_state.pc = value;
|
||||
}
|
||||
|
||||
std::uint64_t SP() const {
|
||||
return current_state.sp;
|
||||
}
|
||||
|
||||
void SetSP(std::uint64_t value) {
|
||||
current_state.sp = value;
|
||||
}
|
||||
|
||||
std::array<std::uint64_t, 31>& Regs() {
|
||||
return current_state.reg;
|
||||
}
|
||||
|
||||
const std::array<std::uint64_t, 31>& Regs() const {
|
||||
return current_state.reg;
|
||||
}
|
||||
|
||||
std::array<std::uint64_t, 64>& VecRegs() {
|
||||
return current_state.vec;
|
||||
}
|
||||
|
||||
const std::array<std::uint64_t, 64>& VecRegs() const {
|
||||
return current_state.vec;
|
||||
}
|
||||
|
||||
std::uint32_t Fpcr() const {
|
||||
return current_state.fpcr;
|
||||
}
|
||||
|
||||
void SetFpcr(std::uint32_t value) {
|
||||
current_state.fpcr = value;
|
||||
}
|
||||
|
||||
std::uint32_t Fpsr() const {
|
||||
return current_state.fpsr;
|
||||
}
|
||||
|
||||
void SetFpsr(std::uint32_t value) {
|
||||
current_state.fpsr = value;
|
||||
}
|
||||
|
||||
std::uint32_t Pstate() const {
|
||||
return current_state.cpsr_nzcv;
|
||||
}
|
||||
|
||||
void SetPstate(std::uint32_t value) {
|
||||
current_state.cpsr_nzcv = value;
|
||||
}
|
||||
|
||||
void ClearExclusiveState() {
|
||||
current_state.exclusive_state = false;
|
||||
}
|
||||
|
||||
bool IsExecuting() const {
|
||||
return is_executing;
|
||||
}
|
||||
|
||||
void DumpDisassembly() const {
|
||||
current_address_space.DumpDisassembly();
|
||||
}
|
||||
|
||||
std::vector<std::string> Disassemble() const {
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
private:
|
||||
void PerformRequestedCacheInvalidation(HaltReason hr) {
|
||||
if (Has(hr, HaltReason::CacheInvalidation)) {
|
||||
std::unique_lock lock{invalidation_mutex};
|
||||
|
||||
ClearHalt(HaltReason::CacheInvalidation);
|
||||
|
||||
if (invalidate_entire_cache) {
|
||||
current_address_space.ClearCache();
|
||||
|
||||
invalidate_entire_cache = false;
|
||||
invalid_cache_ranges.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
if (!invalid_cache_ranges.empty()) {
|
||||
current_address_space.InvalidateCacheRanges(invalid_cache_ranges);
|
||||
|
||||
invalid_cache_ranges.clear();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
A64::UserConfig conf;
|
||||
A64JitState current_state{};
|
||||
A64AddressSpace current_address_space;
|
||||
A64Core core;
|
||||
|
||||
volatile u32 halt_reason = 0;
|
||||
|
||||
std::mutex invalidation_mutex;
|
||||
boost::icl::interval_set<u64> invalid_cache_ranges;
|
||||
bool invalidate_entire_cache = false;
|
||||
bool is_executing = false;
|
||||
};
|
||||
|
||||
Jit::Jit(UserConfig conf)
|
||||
: impl{std::make_unique<Jit::Impl>(this, conf)} {
|
||||
}
|
||||
|
||||
Jit::~Jit() = default;
|
||||
|
||||
HaltReason Jit::Run() {
|
||||
return impl->Run();
|
||||
}
|
||||
|
||||
HaltReason Jit::Step() {
|
||||
return impl->Step();
|
||||
}
|
||||
|
||||
void Jit::ClearCache() {
|
||||
impl->ClearCache();
|
||||
}
|
||||
|
||||
void Jit::InvalidateCacheRange(std::uint64_t start_address, std::size_t length) {
|
||||
impl->InvalidateCacheRange(start_address, length);
|
||||
}
|
||||
|
||||
void Jit::Reset() {
|
||||
impl->Reset();
|
||||
}
|
||||
|
||||
void Jit::HaltExecution(HaltReason hr) {
|
||||
impl->HaltExecution(hr);
|
||||
}
|
||||
|
||||
void Jit::ClearHalt(HaltReason hr) {
|
||||
impl->ClearHalt(hr);
|
||||
}
|
||||
|
||||
std::uint64_t Jit::GetSP() const {
|
||||
return impl->SP();
|
||||
}
|
||||
|
||||
void Jit::SetSP(std::uint64_t value) {
|
||||
impl->SetSP(value);
|
||||
}
|
||||
|
||||
std::uint64_t Jit::GetPC() const {
|
||||
return impl->PC();
|
||||
}
|
||||
|
||||
void Jit::SetPC(std::uint64_t value) {
|
||||
impl->SetPC(value);
|
||||
}
|
||||
|
||||
std::uint64_t Jit::GetRegister(std::size_t index) const {
|
||||
return impl->Regs()[index];
|
||||
}
|
||||
|
||||
void Jit::SetRegister(size_t index, std::uint64_t value) {
|
||||
impl->Regs()[index] = value;
|
||||
}
|
||||
|
||||
std::array<std::uint64_t, 31> Jit::GetRegisters() const {
|
||||
return impl->Regs();
|
||||
}
|
||||
|
||||
void Jit::SetRegisters(const std::array<std::uint64_t, 31>& value) {
|
||||
impl->Regs() = value;
|
||||
}
|
||||
|
||||
Vector Jit::GetVector(std::size_t index) const {
|
||||
auto& vec = impl->VecRegs();
|
||||
return {vec[index * 2], vec[index * 2 + 1]};
|
||||
}
|
||||
|
||||
void Jit::SetVector(std::size_t index, Vector value) {
|
||||
auto& vec = impl->VecRegs();
|
||||
vec[index * 2] = value[0];
|
||||
vec[index * 2 + 1] = value[1];
|
||||
}
|
||||
|
||||
std::array<Vector, 32> Jit::GetVectors() const {
|
||||
std::array<Vector, 32> ret;
|
||||
std::memcpy(ret.data(), impl->VecRegs().data(), sizeof(ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
void Jit::SetVectors(const std::array<Vector, 32>& value) {
|
||||
std::memcpy(impl->VecRegs().data(), value.data(), sizeof(value));
|
||||
}
|
||||
|
||||
std::uint32_t Jit::GetFpcr() const {
|
||||
return impl->Fpcr();
|
||||
}
|
||||
|
||||
void Jit::SetFpcr(std::uint32_t value) {
|
||||
impl->SetFpcr(value);
|
||||
}
|
||||
|
||||
std::uint32_t Jit::GetFpsr() const {
|
||||
return impl->Fpsr();
|
||||
}
|
||||
|
||||
void Jit::SetFpsr(std::uint32_t value) {
|
||||
impl->SetFpsr(value);
|
||||
}
|
||||
|
||||
std::uint32_t Jit::GetPstate() const {
|
||||
return impl->Pstate();
|
||||
}
|
||||
|
||||
void Jit::SetPstate(std::uint32_t value) {
|
||||
impl->SetPstate(value);
|
||||
}
|
||||
|
||||
void Jit::ClearExclusiveState() {
|
||||
impl->ClearExclusiveState();
|
||||
}
|
||||
|
||||
bool Jit::IsExecuting() const {
|
||||
return impl->IsExecuting();
|
||||
}
|
||||
|
||||
void Jit::DumpDisassembly() const {
|
||||
impl->DumpDisassembly();
|
||||
}
|
||||
|
||||
std::vector<std::string> Jit::Disassemble() const {
|
||||
return impl->Disassemble();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A64
|
||||
40
src/dynarmic/src/dynarmic/backend/arm64/a64_jitstate.h
Normal file
40
src/dynarmic/src/dynarmic/backend/arm64/a64_jitstate.h
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/frontend/A64/a64_location_descriptor.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
struct A64JitState {
|
||||
std::array<u64, 31> reg{};
|
||||
u64 sp = 0;
|
||||
u64 pc = 0;
|
||||
|
||||
u32 cpsr_nzcv = 0;
|
||||
|
||||
alignas(16) std::array<u64, 64> vec{};
|
||||
|
||||
u32 exclusive_state = 0;
|
||||
|
||||
u32 fpsr = 0;
|
||||
u32 fpcr = 0;
|
||||
|
||||
IR::LocationDescriptor GetLocationDescriptor() const {
|
||||
const u64 fpcr_u64 = static_cast<u64>(fpcr & A64::LocationDescriptor::fpcr_mask) << A64::LocationDescriptor::fpcr_shift;
|
||||
const u64 pc_u64 = pc & A64::LocationDescriptor::pc_mask;
|
||||
return IR::LocationDescriptor{pc_u64 | fpcr_u64};
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
94
src/dynarmic/src/dynarmic/backend/arm64/abi.cpp
Normal file
94
src/dynarmic/src/dynarmic/backend/arm64/abi.cpp
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <mcl/bit/bit_field.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
using namespace oaknut::util;
|
||||
|
||||
static constexpr size_t gpr_size = 8;
|
||||
static constexpr size_t fpr_size = 16;
|
||||
|
||||
struct FrameInfo {
|
||||
std::vector<int> gprs;
|
||||
std::vector<int> fprs;
|
||||
size_t frame_size;
|
||||
size_t gprs_size;
|
||||
size_t fprs_size;
|
||||
};
|
||||
|
||||
static std::vector<int> ListToIndexes(u32 list) {
|
||||
std::vector<int> indexes;
|
||||
for (int i = 0; i < 32; i++) {
|
||||
if (mcl::bit::get_bit(i, list)) {
|
||||
indexes.push_back(i);
|
||||
}
|
||||
}
|
||||
return indexes;
|
||||
}
|
||||
|
||||
static FrameInfo CalculateFrameInfo(RegisterList rl, size_t frame_size) {
|
||||
const auto gprs = ListToIndexes(static_cast<u32>(rl));
|
||||
const auto fprs = ListToIndexes(static_cast<u32>(rl >> 32));
|
||||
|
||||
const size_t num_gprs = gprs.size();
|
||||
const size_t num_fprs = fprs.size();
|
||||
|
||||
const size_t gprs_size = (num_gprs + 1) / 2 * 16;
|
||||
const size_t fprs_size = num_fprs * 16;
|
||||
|
||||
return {
|
||||
gprs,
|
||||
fprs,
|
||||
frame_size,
|
||||
gprs_size,
|
||||
fprs_size,
|
||||
};
|
||||
}
|
||||
|
||||
#define DO_IT(TYPE, REG_TYPE, PAIR_OP, SINGLE_OP, OFFSET) \
|
||||
if (frame_info.TYPE##s.size() > 0) { \
|
||||
for (size_t i = 0; i < frame_info.TYPE##s.size() - 1; i += 2) { \
|
||||
code.PAIR_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, oaknut::REG_TYPE{frame_info.TYPE##s[i + 1]}, SP, (OFFSET) + i * TYPE##_size); \
|
||||
} \
|
||||
if (frame_info.TYPE##s.size() % 2 == 1) { \
|
||||
const size_t i = frame_info.TYPE##s.size() - 1; \
|
||||
code.SINGLE_OP(oaknut::REG_TYPE{frame_info.TYPE##s[i]}, SP, (OFFSET) + i * TYPE##_size); \
|
||||
} \
|
||||
}
|
||||
|
||||
void ABI_PushRegisters(oaknut::CodeGenerator& code, RegisterList rl, size_t frame_size) {
|
||||
const FrameInfo frame_info = CalculateFrameInfo(rl, frame_size);
|
||||
|
||||
code.SUB(SP, SP, frame_info.gprs_size + frame_info.fprs_size);
|
||||
|
||||
DO_IT(gpr, XReg, STP, STR, 0)
|
||||
DO_IT(fpr, QReg, STP, STR, frame_info.gprs_size)
|
||||
|
||||
code.SUB(SP, SP, frame_info.frame_size);
|
||||
}
|
||||
|
||||
void ABI_PopRegisters(oaknut::CodeGenerator& code, RegisterList rl, size_t frame_size) {
|
||||
const FrameInfo frame_info = CalculateFrameInfo(rl, frame_size);
|
||||
|
||||
code.ADD(SP, SP, frame_info.frame_size);
|
||||
|
||||
DO_IT(gpr, XReg, LDP, LDR, 0)
|
||||
DO_IT(fpr, QReg, LDP, LDR, frame_info.gprs_size)
|
||||
|
||||
code.ADD(SP, SP, frame_info.gprs_size + frame_info.fprs_size);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
80
src/dynarmic/src/dynarmic/backend/arm64/abi.h
Normal file
80
src/dynarmic/src/dynarmic/backend/arm64/abi.h
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <initializer_list>
|
||||
#include <stdexcept>
|
||||
#include <type_traits>
|
||||
|
||||
#include <mcl/mp/metavalue/lift_value.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/common/always_false.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
constexpr oaknut::XReg Xstate{28};
|
||||
constexpr oaknut::XReg Xhalt{27};
|
||||
constexpr oaknut::XReg Xticks{26};
|
||||
constexpr oaknut::XReg Xfastmem{25};
|
||||
constexpr oaknut::XReg Xpagetable{24};
|
||||
|
||||
constexpr oaknut::XReg Xscratch0{16}, Xscratch1{17}, Xscratch2{30};
|
||||
constexpr oaknut::WReg Wscratch0{16}, Wscratch1{17}, Wscratch2{30};
|
||||
|
||||
template<size_t bitsize>
|
||||
constexpr auto Rscratch0() {
|
||||
if constexpr (bitsize == 32) {
|
||||
return Wscratch0;
|
||||
} else if constexpr (bitsize == 64) {
|
||||
return Xscratch0;
|
||||
} else {
|
||||
static_assert(Common::always_false_v<mcl::mp::lift_value<bitsize>>);
|
||||
}
|
||||
}
|
||||
|
||||
template<size_t bitsize>
|
||||
constexpr auto Rscratch1() {
|
||||
if constexpr (bitsize == 32) {
|
||||
return Wscratch1;
|
||||
} else if constexpr (bitsize == 64) {
|
||||
return Xscratch1;
|
||||
} else {
|
||||
static_assert(Common::always_false_v<mcl::mp::lift_value<bitsize>>);
|
||||
}
|
||||
}
|
||||
|
||||
constexpr std::initializer_list<int> GPR_ORDER{19, 20, 21, 22, 23, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8};
|
||||
constexpr std::initializer_list<int> FPR_ORDER{8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
|
||||
|
||||
using RegisterList = u64;
|
||||
|
||||
constexpr RegisterList ToRegList(oaknut::Reg reg) {
|
||||
if (reg.is_vector()) {
|
||||
return RegisterList{1} << (reg.index() + 32);
|
||||
}
|
||||
|
||||
if (reg.index() == 31) {
|
||||
ASSERT_FALSE("ZR not allowed in reg list");
|
||||
}
|
||||
|
||||
if (reg.index() == -1) {
|
||||
return RegisterList{1} << 31;
|
||||
}
|
||||
return RegisterList{1} << reg.index();
|
||||
}
|
||||
|
||||
constexpr RegisterList ABI_CALLEE_SAVE = 0x0000ff00'7ff80000;
|
||||
constexpr RegisterList ABI_CALLER_SAVE = 0xffffffff'4000ffff;
|
||||
|
||||
void ABI_PushRegisters(oaknut::CodeGenerator& code, RegisterList rl, size_t stack_space);
|
||||
void ABI_PopRegisters(oaknut::CodeGenerator& code, RegisterList rl, size_t stack_space);
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
353
src/dynarmic/src/dynarmic/backend/arm64/address_space.cpp
Normal file
353
src/dynarmic/src/dynarmic/backend/arm64/address_space.cpp
Normal file
|
|
@ -0,0 +1,353 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
#include <mcl/bit_cast.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/a64_address_space.h"
|
||||
#include "dynarmic/backend/arm64/a64_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/devirtualize.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/stack_layout.h"
|
||||
#include "dynarmic/common/cast_util.h"
|
||||
#include "dynarmic/common/fp/fpcr.h"
|
||||
#include "dynarmic/common/llvm_disassemble.h"
|
||||
#include "dynarmic/interface/exclusive_monitor.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
AddressSpace::AddressSpace(size_t code_cache_size)
|
||||
: code_cache_size(code_cache_size)
|
||||
, mem(code_cache_size)
|
||||
, code(mem.ptr(), mem.ptr())
|
||||
, fastmem_manager(exception_handler) {
|
||||
ASSERT_MSG(code_cache_size <= 128 * 1024 * 1024, "code_cache_size > 128 MiB not currently supported");
|
||||
|
||||
exception_handler.Register(mem, code_cache_size);
|
||||
exception_handler.SetFastmemCallback([this](u64 host_pc) {
|
||||
return FastmemCallback(host_pc);
|
||||
});
|
||||
}
|
||||
|
||||
AddressSpace::~AddressSpace() = default;
|
||||
|
||||
CodePtr AddressSpace::Get(IR::LocationDescriptor descriptor) {
|
||||
if (const auto iter = block_entries.find(descriptor); iter != block_entries.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::optional<IR::LocationDescriptor> AddressSpace::ReverseGetLocation(CodePtr host_pc) {
|
||||
if (auto iter = reverse_block_entries.upper_bound(host_pc); iter != reverse_block_entries.begin()) {
|
||||
// upper_bound locates the first value greater than host_pc, so we need to decrement
|
||||
--iter;
|
||||
return iter->second;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
CodePtr AddressSpace::ReverseGetEntryPoint(CodePtr host_pc) {
|
||||
if (auto iter = reverse_block_entries.upper_bound(host_pc); iter != reverse_block_entries.begin()) {
|
||||
// upper_bound locates the first value greater than host_pc, so we need to decrement
|
||||
--iter;
|
||||
return iter->first;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
CodePtr AddressSpace::GetOrEmit(IR::LocationDescriptor descriptor) {
|
||||
if (CodePtr block_entry = Get(descriptor)) {
|
||||
return block_entry;
|
||||
}
|
||||
|
||||
IR::Block ir_block = GenerateIR(descriptor);
|
||||
const EmittedBlockInfo block_info = Emit(std::move(ir_block));
|
||||
return block_info.entry_point;
|
||||
}
|
||||
|
||||
void AddressSpace::InvalidateBasicBlocks(const ankerl::unordered_dense::set<IR::LocationDescriptor>& descriptors) {
|
||||
UnprotectCodeMemory();
|
||||
|
||||
for (const auto& descriptor : descriptors) {
|
||||
const auto iter = block_entries.find(descriptor);
|
||||
if (iter == block_entries.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Unlink before removal because InvalidateBasicBlocks can be called within a fastmem callback,
|
||||
// and the currently executing block may have references to itself which need to be unlinked.
|
||||
RelinkForDescriptor(descriptor, nullptr);
|
||||
|
||||
block_entries.erase(iter);
|
||||
}
|
||||
|
||||
ProtectCodeMemory();
|
||||
}
|
||||
|
||||
void AddressSpace::ClearCache() {
|
||||
block_entries.clear();
|
||||
reverse_block_entries.clear();
|
||||
block_infos.clear();
|
||||
block_references.clear();
|
||||
code.set_offset(prelude_info.end_of_prelude);
|
||||
}
|
||||
|
||||
void AddressSpace::DumpDisassembly() const {
|
||||
for (u32* ptr = mem.ptr(); ptr < code.xptr<u32*>(); ptr++) {
|
||||
std::printf("%s", Common::DisassembleAArch64(*ptr, mcl::bit_cast<u64>(ptr)).c_str());
|
||||
}
|
||||
}
|
||||
|
||||
size_t AddressSpace::GetRemainingSize() {
|
||||
return code_cache_size - static_cast<size_t>(code.offset());
|
||||
}
|
||||
|
||||
EmittedBlockInfo AddressSpace::Emit(IR::Block block) {
|
||||
if (GetRemainingSize() < 1024 * 1024) {
|
||||
ClearCache();
|
||||
}
|
||||
|
||||
UnprotectCodeMemory();
|
||||
|
||||
EmittedBlockInfo block_info = EmitArm64(code, std::move(block), GetEmitConfig(), fastmem_manager);
|
||||
|
||||
ASSERT(block_entries.insert({block.Location(), block_info.entry_point}).second);
|
||||
ASSERT(reverse_block_entries.insert({block_info.entry_point, block.Location()}).second);
|
||||
ASSERT(block_infos.insert({block_info.entry_point, block_info}).second);
|
||||
|
||||
Link(block_info);
|
||||
RelinkForDescriptor(block.Location(), block_info.entry_point);
|
||||
|
||||
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
|
||||
ProtectCodeMemory();
|
||||
|
||||
RegisterNewBasicBlock(block, block_info);
|
||||
|
||||
return block_info;
|
||||
}
|
||||
|
||||
void AddressSpace::Link(EmittedBlockInfo& block_info) {
|
||||
using namespace oaknut;
|
||||
using namespace oaknut::util;
|
||||
|
||||
for (auto [ptr_offset, target] : block_info.relocations) {
|
||||
CodeGenerator c{mem.ptr(), mem.ptr()};
|
||||
c.set_xptr(reinterpret_cast<u32*>(block_info.entry_point + ptr_offset));
|
||||
|
||||
switch (target) {
|
||||
case LinkTarget::ReturnToDispatcher:
|
||||
c.B(prelude_info.return_to_dispatcher);
|
||||
break;
|
||||
case LinkTarget::ReturnFromRunCode:
|
||||
c.B(prelude_info.return_from_run_code);
|
||||
break;
|
||||
case LinkTarget::ReadMemory8:
|
||||
c.BL(prelude_info.read_memory_8);
|
||||
break;
|
||||
case LinkTarget::ReadMemory16:
|
||||
c.BL(prelude_info.read_memory_16);
|
||||
break;
|
||||
case LinkTarget::ReadMemory32:
|
||||
c.BL(prelude_info.read_memory_32);
|
||||
break;
|
||||
case LinkTarget::ReadMemory64:
|
||||
c.BL(prelude_info.read_memory_64);
|
||||
break;
|
||||
case LinkTarget::ReadMemory128:
|
||||
c.BL(prelude_info.read_memory_128);
|
||||
break;
|
||||
case LinkTarget::WrappedReadMemory8:
|
||||
c.BL(prelude_info.wrapped_read_memory_8);
|
||||
break;
|
||||
case LinkTarget::WrappedReadMemory16:
|
||||
c.BL(prelude_info.wrapped_read_memory_16);
|
||||
break;
|
||||
case LinkTarget::WrappedReadMemory32:
|
||||
c.BL(prelude_info.wrapped_read_memory_32);
|
||||
break;
|
||||
case LinkTarget::WrappedReadMemory64:
|
||||
c.BL(prelude_info.wrapped_read_memory_64);
|
||||
break;
|
||||
case LinkTarget::WrappedReadMemory128:
|
||||
c.BL(prelude_info.wrapped_read_memory_128);
|
||||
break;
|
||||
case LinkTarget::ExclusiveReadMemory8:
|
||||
c.BL(prelude_info.exclusive_read_memory_8);
|
||||
break;
|
||||
case LinkTarget::ExclusiveReadMemory16:
|
||||
c.BL(prelude_info.exclusive_read_memory_16);
|
||||
break;
|
||||
case LinkTarget::ExclusiveReadMemory32:
|
||||
c.BL(prelude_info.exclusive_read_memory_32);
|
||||
break;
|
||||
case LinkTarget::ExclusiveReadMemory64:
|
||||
c.BL(prelude_info.exclusive_read_memory_64);
|
||||
break;
|
||||
case LinkTarget::ExclusiveReadMemory128:
|
||||
c.BL(prelude_info.exclusive_read_memory_128);
|
||||
break;
|
||||
case LinkTarget::WriteMemory8:
|
||||
c.BL(prelude_info.write_memory_8);
|
||||
break;
|
||||
case LinkTarget::WriteMemory16:
|
||||
c.BL(prelude_info.write_memory_16);
|
||||
break;
|
||||
case LinkTarget::WriteMemory32:
|
||||
c.BL(prelude_info.write_memory_32);
|
||||
break;
|
||||
case LinkTarget::WriteMemory64:
|
||||
c.BL(prelude_info.write_memory_64);
|
||||
break;
|
||||
case LinkTarget::WriteMemory128:
|
||||
c.BL(prelude_info.write_memory_128);
|
||||
break;
|
||||
case LinkTarget::WrappedWriteMemory8:
|
||||
c.BL(prelude_info.wrapped_write_memory_8);
|
||||
break;
|
||||
case LinkTarget::WrappedWriteMemory16:
|
||||
c.BL(prelude_info.wrapped_write_memory_16);
|
||||
break;
|
||||
case LinkTarget::WrappedWriteMemory32:
|
||||
c.BL(prelude_info.wrapped_write_memory_32);
|
||||
break;
|
||||
case LinkTarget::WrappedWriteMemory64:
|
||||
c.BL(prelude_info.wrapped_write_memory_64);
|
||||
break;
|
||||
case LinkTarget::WrappedWriteMemory128:
|
||||
c.BL(prelude_info.wrapped_write_memory_128);
|
||||
break;
|
||||
case LinkTarget::ExclusiveWriteMemory8:
|
||||
c.BL(prelude_info.exclusive_write_memory_8);
|
||||
break;
|
||||
case LinkTarget::ExclusiveWriteMemory16:
|
||||
c.BL(prelude_info.exclusive_write_memory_16);
|
||||
break;
|
||||
case LinkTarget::ExclusiveWriteMemory32:
|
||||
c.BL(prelude_info.exclusive_write_memory_32);
|
||||
break;
|
||||
case LinkTarget::ExclusiveWriteMemory64:
|
||||
c.BL(prelude_info.exclusive_write_memory_64);
|
||||
break;
|
||||
case LinkTarget::ExclusiveWriteMemory128:
|
||||
c.BL(prelude_info.exclusive_write_memory_128);
|
||||
break;
|
||||
case LinkTarget::CallSVC:
|
||||
c.BL(prelude_info.call_svc);
|
||||
break;
|
||||
case LinkTarget::ExceptionRaised:
|
||||
c.BL(prelude_info.exception_raised);
|
||||
break;
|
||||
case LinkTarget::InstructionSynchronizationBarrierRaised:
|
||||
c.BL(prelude_info.isb_raised);
|
||||
break;
|
||||
case LinkTarget::InstructionCacheOperationRaised:
|
||||
c.BL(prelude_info.ic_raised);
|
||||
break;
|
||||
case LinkTarget::DataCacheOperationRaised:
|
||||
c.BL(prelude_info.dc_raised);
|
||||
break;
|
||||
case LinkTarget::GetCNTPCT:
|
||||
c.BL(prelude_info.get_cntpct);
|
||||
break;
|
||||
case LinkTarget::AddTicks:
|
||||
c.BL(prelude_info.add_ticks);
|
||||
break;
|
||||
case LinkTarget::GetTicksRemaining:
|
||||
c.BL(prelude_info.get_ticks_remaining);
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid relocation target");
|
||||
}
|
||||
}
|
||||
|
||||
for (auto [target_descriptor, list] : block_info.block_relocations) {
|
||||
block_references[target_descriptor].insert(block_info.entry_point);
|
||||
LinkBlockLinks(block_info.entry_point, Get(target_descriptor), list);
|
||||
}
|
||||
}
|
||||
|
||||
void AddressSpace::LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector<BlockRelocation>& block_relocations_list) {
|
||||
using namespace oaknut;
|
||||
using namespace oaknut::util;
|
||||
|
||||
for (auto [ptr_offset, type] : block_relocations_list) {
|
||||
CodeGenerator c{mem.ptr(), mem.ptr()};
|
||||
c.set_xptr(reinterpret_cast<u32*>(entry_point + ptr_offset));
|
||||
|
||||
switch (type) {
|
||||
case BlockRelocationType::Branch:
|
||||
if (target_ptr) {
|
||||
c.B((void*)target_ptr);
|
||||
} else {
|
||||
c.NOP();
|
||||
}
|
||||
break;
|
||||
case BlockRelocationType::MoveToScratch1:
|
||||
if (target_ptr) {
|
||||
c.ADRL(Xscratch1, (void*)target_ptr);
|
||||
} else {
|
||||
c.ADRL(Xscratch1, prelude_info.return_to_dispatcher);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid BlockRelocationType");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AddressSpace::RelinkForDescriptor(IR::LocationDescriptor target_descriptor, CodePtr target_ptr) {
|
||||
for (auto code_ptr : block_references[target_descriptor]) {
|
||||
if (auto block_iter = block_infos.find(code_ptr); block_iter != block_infos.end()) {
|
||||
const EmittedBlockInfo& block_info = block_iter->second;
|
||||
|
||||
if (auto relocation_iter = block_info.block_relocations.find(target_descriptor); relocation_iter != block_info.block_relocations.end()) {
|
||||
LinkBlockLinks(block_info.entry_point, target_ptr, relocation_iter->second);
|
||||
}
|
||||
|
||||
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FakeCall AddressSpace::FastmemCallback(u64 host_pc) {
|
||||
{
|
||||
const auto host_ptr = mcl::bit_cast<CodePtr>(host_pc);
|
||||
|
||||
const auto entry_point = ReverseGetEntryPoint(host_ptr);
|
||||
if (!entry_point) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
const auto block_info = block_infos.find(entry_point);
|
||||
if (block_info == block_infos.end()) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
const auto patch_entry = block_info->second.fastmem_patch_info.find(host_ptr - entry_point);
|
||||
if (patch_entry == block_info->second.fastmem_patch_info.end()) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
const auto fc = patch_entry->second.fc;
|
||||
|
||||
if (patch_entry->second.recompile) {
|
||||
const auto marker = patch_entry->second.marker;
|
||||
fastmem_manager.MarkDoNotFastmem(marker);
|
||||
InvalidateBasicBlocks({std::get<0>(marker)});
|
||||
}
|
||||
|
||||
return fc;
|
||||
}
|
||||
|
||||
fail:
|
||||
fmt::print("dynarmic: Segfault happened within JITted code at host_pc = {:016x}\n", host_pc);
|
||||
fmt::print("Segfault wasn't at a fastmem patch location!\n");
|
||||
ASSERT_FALSE("segfault");
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
140
src/dynarmic/src/dynarmic/backend/arm64/address_space.h
Normal file
140
src/dynarmic/src/dynarmic/backend/arm64/address_space.h
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <optional>
|
||||
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include <oaknut/code_block.hpp>
|
||||
#include <oaknut/oaknut.hpp>
|
||||
#include <ankerl/unordered_dense.h>
|
||||
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/fastmem.h"
|
||||
#include "dynarmic/interface/halt_reason.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/location_descriptor.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
class AddressSpace {
|
||||
public:
|
||||
explicit AddressSpace(size_t code_cache_size);
|
||||
virtual ~AddressSpace();
|
||||
|
||||
virtual IR::Block GenerateIR(IR::LocationDescriptor) const = 0;
|
||||
|
||||
CodePtr Get(IR::LocationDescriptor descriptor);
|
||||
|
||||
// Returns "most likely" LocationDescriptor assocated with the emitted code at that location
|
||||
std::optional<IR::LocationDescriptor> ReverseGetLocation(CodePtr host_pc);
|
||||
|
||||
// Returns "most likely" entry_point associated with the emitted code at that location
|
||||
CodePtr ReverseGetEntryPoint(CodePtr host_pc);
|
||||
|
||||
CodePtr GetOrEmit(IR::LocationDescriptor descriptor);
|
||||
|
||||
void InvalidateBasicBlocks(const ankerl::unordered_dense::set<IR::LocationDescriptor>& descriptors);
|
||||
|
||||
void ClearCache();
|
||||
|
||||
void DumpDisassembly() const;
|
||||
|
||||
protected:
|
||||
virtual EmitConfig GetEmitConfig() = 0;
|
||||
virtual void RegisterNewBasicBlock(const IR::Block& block, const EmittedBlockInfo& block_info) = 0;
|
||||
|
||||
void ProtectCodeMemory() {
|
||||
#if defined(DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT) || defined(__APPLE__) || defined(__OpenBSD__)
|
||||
mem.protect();
|
||||
#endif
|
||||
}
|
||||
|
||||
void UnprotectCodeMemory() {
|
||||
#if defined(DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT) || defined(__APPLE__) || defined(__OpenBSD__)
|
||||
mem.unprotect();
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t GetRemainingSize();
|
||||
EmittedBlockInfo Emit(IR::Block ir_block);
|
||||
void Link(EmittedBlockInfo& block);
|
||||
void LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector<BlockRelocation>& block_relocations_list);
|
||||
void RelinkForDescriptor(IR::LocationDescriptor target_descriptor, CodePtr target_ptr);
|
||||
|
||||
FakeCall FastmemCallback(u64 host_pc);
|
||||
|
||||
const size_t code_cache_size;
|
||||
oaknut::CodeBlock mem;
|
||||
oaknut::CodeGenerator code;
|
||||
|
||||
// A IR::LocationDescriptor will have one current CodePtr.
|
||||
// However, there can be multiple other CodePtrs which are older, previously invalidated blocks.
|
||||
std::map<CodePtr, IR::LocationDescriptor> reverse_block_entries;
|
||||
ankerl::unordered_dense::map<IR::LocationDescriptor, CodePtr> block_entries;
|
||||
ankerl::unordered_dense::map<CodePtr, EmittedBlockInfo> block_infos;
|
||||
ankerl::unordered_dense::map<IR::LocationDescriptor, ankerl::unordered_dense::set<CodePtr>> block_references;
|
||||
|
||||
ExceptionHandler exception_handler;
|
||||
FastmemManager fastmem_manager;
|
||||
|
||||
struct PreludeInfo {
|
||||
std::ptrdiff_t end_of_prelude;
|
||||
|
||||
using RunCodeFuncType = HaltReason (*)(CodePtr entry_point, void* jit_state, volatile u32* halt_reason);
|
||||
RunCodeFuncType run_code;
|
||||
RunCodeFuncType step_code;
|
||||
void* return_to_dispatcher;
|
||||
void* return_from_run_code;
|
||||
|
||||
void* read_memory_8;
|
||||
void* read_memory_16;
|
||||
void* read_memory_32;
|
||||
void* read_memory_64;
|
||||
void* read_memory_128;
|
||||
void* wrapped_read_memory_8;
|
||||
void* wrapped_read_memory_16;
|
||||
void* wrapped_read_memory_32;
|
||||
void* wrapped_read_memory_64;
|
||||
void* wrapped_read_memory_128;
|
||||
void* exclusive_read_memory_8;
|
||||
void* exclusive_read_memory_16;
|
||||
void* exclusive_read_memory_32;
|
||||
void* exclusive_read_memory_64;
|
||||
void* exclusive_read_memory_128;
|
||||
void* write_memory_8;
|
||||
void* write_memory_16;
|
||||
void* write_memory_32;
|
||||
void* write_memory_64;
|
||||
void* write_memory_128;
|
||||
void* wrapped_write_memory_8;
|
||||
void* wrapped_write_memory_16;
|
||||
void* wrapped_write_memory_32;
|
||||
void* wrapped_write_memory_64;
|
||||
void* wrapped_write_memory_128;
|
||||
void* exclusive_write_memory_8;
|
||||
void* exclusive_write_memory_16;
|
||||
void* exclusive_write_memory_32;
|
||||
void* exclusive_write_memory_64;
|
||||
void* exclusive_write_memory_128;
|
||||
|
||||
void* call_svc;
|
||||
void* exception_raised;
|
||||
void* dc_raised;
|
||||
void* ic_raised;
|
||||
void* isb_raised;
|
||||
|
||||
void* get_cntpct;
|
||||
void* add_ticks;
|
||||
void* get_ticks_remaining;
|
||||
} prelude_info;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
60
src/dynarmic/src/dynarmic/backend/arm64/devirtualize.h
Normal file
60
src/dynarmic/src/dynarmic/backend/arm64/devirtualize.h
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mcl/bit_cast.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include <mcl/type_traits/function_info.hpp>
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
struct DevirtualizedCall {
|
||||
u64 fn_ptr;
|
||||
u64 this_ptr;
|
||||
};
|
||||
|
||||
// https://rants.vastheman.com/2021/09/21/msvc/
|
||||
template<auto mfp>
|
||||
DevirtualizedCall DevirtualizeWindows(mcl::class_type<decltype(mfp)>* this_) {
|
||||
static_assert(sizeof(mfp) == 8);
|
||||
return DevirtualizedCall{mcl::bit_cast<u64>(mfp), reinterpret_cast<u64>(this_)};
|
||||
}
|
||||
|
||||
// https://github.com/ARM-software/abi-aa/blob/main/cppabi64/cppabi64.rst#representation-of-pointer-to-member-function
|
||||
template<auto mfp>
|
||||
DevirtualizedCall DevirtualizeDefault(mcl::class_type<decltype(mfp)>* this_) {
|
||||
struct MemberFunctionPointer {
|
||||
// Address of non-virtual function or index into vtable.
|
||||
u64 ptr;
|
||||
// LSB is discriminator for if function is virtual. Other bits are this adjustment.
|
||||
u64 adj;
|
||||
} mfp_struct = mcl::bit_cast<MemberFunctionPointer>(mfp);
|
||||
|
||||
static_assert(sizeof(MemberFunctionPointer) == 16);
|
||||
static_assert(sizeof(MemberFunctionPointer) == sizeof(mfp));
|
||||
|
||||
u64 fn_ptr = mfp_struct.ptr;
|
||||
u64 this_ptr = mcl::bit_cast<u64>(this_) + (mfp_struct.adj >> 1);
|
||||
if (mfp_struct.adj & 1) {
|
||||
u64 vtable = mcl::bit_cast_pointee<u64>(this_ptr);
|
||||
fn_ptr = mcl::bit_cast_pointee<u64>(vtable + fn_ptr);
|
||||
}
|
||||
return DevirtualizedCall{fn_ptr, this_ptr};
|
||||
}
|
||||
|
||||
template<auto mfp>
|
||||
DevirtualizedCall Devirtualize(mcl::class_type<decltype(mfp)>* this_) {
|
||||
#if defined(_WIN32) && defined(_MSC_VER)
|
||||
return DevirtualizeWindows<mfp>(this_);
|
||||
#else
|
||||
return DevirtualizeDefault<mfp>(this_);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
290
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64.cpp
Normal file
290
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64.cpp
Normal file
|
|
@ -0,0 +1,290 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/backend/arm64/verbose_debugging_output.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
using namespace oaknut::util;
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Void>(oaknut::CodeGenerator&, EmitContext&, IR::Inst*) {}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Identity>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
ctx.reg_alloc.DefineAsExisting(inst, args[0]);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Breakpoint>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
|
||||
code.BRK(0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CallHostFunction>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
ctx.reg_alloc.PrepareForCall(args[1], args[2], args[3]);
|
||||
code.MOV(Xscratch0, args[0].GetImmediateU64());
|
||||
code.BLR(Xscratch0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PushRSB>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
if (!ctx.conf.HasOptimization(OptimizationFlag::ReturnStackBuffer)) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ASSERT(args[0].IsImmediate());
|
||||
const IR::LocationDescriptor target{args[0].GetImmediateU64()};
|
||||
|
||||
code.LDR(Wscratch2, SP, offsetof(StackLayout, rsb_ptr));
|
||||
code.ADD(Wscratch2, Wscratch2, sizeof(RSBEntry));
|
||||
code.AND(Wscratch2, Wscratch2, RSBIndexMask);
|
||||
code.STR(Wscratch2, SP, offsetof(StackLayout, rsb_ptr));
|
||||
code.ADD(Xscratch2, SP, Xscratch2);
|
||||
|
||||
code.MOV(Xscratch0, target.Value());
|
||||
EmitBlockLinkRelocation(code, ctx, target, BlockRelocationType::MoveToScratch1);
|
||||
code.STP(Xscratch0, Xscratch1, Xscratch2, offsetof(StackLayout, rsb));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::GetCarryFromOp>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
|
||||
[[maybe_unused]] auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ASSERT(ctx.reg_alloc.WasValueDefined(inst));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::GetOverflowFromOp>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
|
||||
[[maybe_unused]] auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ASSERT(ctx.reg_alloc.WasValueDefined(inst));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::GetGEFromOp>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
|
||||
[[maybe_unused]] auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ASSERT(ctx.reg_alloc.WasValueDefined(inst));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::GetNZCVFromOp>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (ctx.reg_alloc.WasValueDefined(inst)) {
|
||||
return;
|
||||
}
|
||||
|
||||
switch (args[0].GetType()) {
|
||||
case IR::Type::U32: {
|
||||
auto Wvalue = ctx.reg_alloc.ReadW(args[0]);
|
||||
auto flags = ctx.reg_alloc.WriteFlags(inst);
|
||||
RegAlloc::Realize(Wvalue, flags);
|
||||
|
||||
code.TST(*Wvalue, Wvalue);
|
||||
break;
|
||||
}
|
||||
case IR::Type::U64: {
|
||||
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
|
||||
auto flags = ctx.reg_alloc.WriteFlags(inst);
|
||||
RegAlloc::Realize(Xvalue, flags);
|
||||
|
||||
code.TST(*Xvalue, Xvalue);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ASSERT_FALSE("Invalid type for GetNZCVFromOp");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::GetNZFromOp>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (ctx.reg_alloc.WasValueDefined(inst)) {
|
||||
return;
|
||||
}
|
||||
|
||||
switch (args[0].GetType()) {
|
||||
case IR::Type::U32: {
|
||||
auto Wvalue = ctx.reg_alloc.ReadW(args[0]);
|
||||
auto flags = ctx.reg_alloc.WriteFlags(inst);
|
||||
RegAlloc::Realize(Wvalue, flags);
|
||||
|
||||
code.TST(*Wvalue, *Wvalue);
|
||||
break;
|
||||
}
|
||||
case IR::Type::U64: {
|
||||
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
|
||||
auto flags = ctx.reg_alloc.WriteFlags(inst);
|
||||
RegAlloc::Realize(Xvalue, flags);
|
||||
|
||||
code.TST(*Xvalue, *Xvalue);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ASSERT_FALSE("Invalid type for GetNZFromOp");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::GetUpperFromOp>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
|
||||
[[maybe_unused]] auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ASSERT(ctx.reg_alloc.WasValueDefined(inst));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::GetLowerFromOp>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
|
||||
[[maybe_unused]] auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ASSERT(ctx.reg_alloc.WasValueDefined(inst));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::GetCFlagFromNZCV>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
auto Wc = ctx.reg_alloc.WriteW(inst);
|
||||
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wc, Wnzcv);
|
||||
|
||||
code.AND(Wc, Wnzcv, 1 << 29);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::NZCVFromPackedFlags>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
ctx.reg_alloc.DefineAsExisting(inst, args[0]);
|
||||
}
|
||||
|
||||
static void EmitAddCycles(oaknut::CodeGenerator& code, EmitContext& ctx, size_t cycles_to_add) {
|
||||
if (!ctx.conf.enable_cycle_counting) {
|
||||
return;
|
||||
}
|
||||
if (cycles_to_add == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (oaknut::AddSubImm::is_valid(cycles_to_add)) {
|
||||
code.SUB(Xticks, Xticks, cycles_to_add);
|
||||
} else {
|
||||
code.MOV(Xscratch1, cycles_to_add);
|
||||
code.SUB(Xticks, Xticks, Xscratch1);
|
||||
}
|
||||
}
|
||||
|
||||
EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const EmitConfig& conf, FastmemManager& fastmem_manager) {
|
||||
if (conf.very_verbose_debugging_output) {
|
||||
std::puts(IR::DumpBlock(block).c_str());
|
||||
}
|
||||
|
||||
EmittedBlockInfo ebi;
|
||||
|
||||
FpsrManager fpsr_manager{code, conf.state_fpsr_offset};
|
||||
RegAlloc reg_alloc{code, fpsr_manager, GPR_ORDER, FPR_ORDER};
|
||||
EmitContext ctx{block, reg_alloc, conf, ebi, fpsr_manager, fastmem_manager, {}};
|
||||
|
||||
ebi.entry_point = code.xptr<CodePtr>();
|
||||
|
||||
if (ctx.block.GetCondition() == IR::Cond::AL) {
|
||||
ASSERT(!ctx.block.HasConditionFailedLocation());
|
||||
} else {
|
||||
ASSERT(ctx.block.HasConditionFailedLocation());
|
||||
oaknut::Label pass;
|
||||
|
||||
pass = conf.emit_cond(code, ctx, ctx.block.GetCondition());
|
||||
EmitAddCycles(code, ctx, ctx.block.ConditionFailedCycleCount());
|
||||
conf.emit_condition_failed_terminal(code, ctx);
|
||||
|
||||
code.l(pass);
|
||||
}
|
||||
|
||||
for (auto iter = block.begin(); iter != block.end(); ++iter) {
|
||||
IR::Inst* inst = &*iter;
|
||||
|
||||
switch (inst->GetOpcode()) {
|
||||
#define OPCODE(name, type, ...) \
|
||||
case IR::Opcode::name: \
|
||||
EmitIR<IR::Opcode::name>(code, ctx, inst); \
|
||||
break;
|
||||
#define A32OPC(name, type, ...) \
|
||||
case IR::Opcode::A32##name: \
|
||||
EmitIR<IR::Opcode::A32##name>(code, ctx, inst); \
|
||||
break;
|
||||
#define A64OPC(name, type, ...) \
|
||||
case IR::Opcode::A64##name: \
|
||||
EmitIR<IR::Opcode::A64##name>(code, ctx, inst); \
|
||||
break;
|
||||
#include "dynarmic/ir/opcodes.inc"
|
||||
#undef OPCODE
|
||||
#undef A32OPC
|
||||
#undef A64OPC
|
||||
default:
|
||||
ASSERT_FALSE("Invalid opcode: {}", inst->GetOpcode());
|
||||
break;
|
||||
}
|
||||
|
||||
reg_alloc.UpdateAllUses();
|
||||
reg_alloc.AssertAllUnlocked();
|
||||
|
||||
if (conf.very_verbose_debugging_output) {
|
||||
EmitVerboseDebuggingOutput(code, ctx);
|
||||
}
|
||||
}
|
||||
|
||||
fpsr_manager.Spill();
|
||||
|
||||
reg_alloc.AssertNoMoreUses();
|
||||
|
||||
EmitAddCycles(code, ctx, block.CycleCount());
|
||||
conf.emit_terminal(code, ctx);
|
||||
code.BRK(0);
|
||||
|
||||
for (const auto& deferred_emit : ctx.deferred_emits) {
|
||||
deferred_emit();
|
||||
}
|
||||
code.BRK(0);
|
||||
|
||||
ebi.size = code.xptr<CodePtr>() - ebi.entry_point;
|
||||
return ebi;
|
||||
}
|
||||
|
||||
void EmitRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, LinkTarget link_target) {
|
||||
ctx.ebi.relocations.emplace_back(Relocation{code.xptr<CodePtr>() - ctx.ebi.entry_point, link_target});
|
||||
code.NOP();
|
||||
}
|
||||
|
||||
void EmitBlockLinkRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, const IR::LocationDescriptor& descriptor, BlockRelocationType type) {
|
||||
ctx.ebi.block_relocations[descriptor].emplace_back(BlockRelocation{code.xptr<CodePtr>() - ctx.ebi.entry_point, type});
|
||||
switch (type) {
|
||||
case BlockRelocationType::Branch:
|
||||
code.NOP();
|
||||
break;
|
||||
case BlockRelocationType::MoveToScratch1:
|
||||
code.BRK(0);
|
||||
code.NOP();
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
184
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64.h
Normal file
184
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64.h
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include <ankerl/unordered_dense.h>
|
||||
|
||||
#include "dynarmic/backend/arm64/fastmem.h"
|
||||
#include "dynarmic/interface/A32/coprocessor.h"
|
||||
#include "dynarmic/interface/optimization_flags.h"
|
||||
#include "dynarmic/ir/location_descriptor.h"
|
||||
|
||||
namespace oaknut {
|
||||
struct CodeGenerator;
|
||||
struct Label;
|
||||
} // namespace oaknut
|
||||
|
||||
namespace Dynarmic::FP {
|
||||
class FPCR;
|
||||
} // namespace Dynarmic::FP
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
class Block;
|
||||
class Inst;
|
||||
enum class Cond;
|
||||
enum class Opcode;
|
||||
} // namespace Dynarmic::IR
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
struct EmitContext;
|
||||
|
||||
using CodePtr = std::byte*;
|
||||
|
||||
enum class LinkTarget {
|
||||
ReturnToDispatcher,
|
||||
ReturnFromRunCode,
|
||||
ReadMemory8,
|
||||
ReadMemory16,
|
||||
ReadMemory32,
|
||||
ReadMemory64,
|
||||
ReadMemory128,
|
||||
WrappedReadMemory8,
|
||||
WrappedReadMemory16,
|
||||
WrappedReadMemory32,
|
||||
WrappedReadMemory64,
|
||||
WrappedReadMemory128,
|
||||
ExclusiveReadMemory8,
|
||||
ExclusiveReadMemory16,
|
||||
ExclusiveReadMemory32,
|
||||
ExclusiveReadMemory64,
|
||||
ExclusiveReadMemory128,
|
||||
WriteMemory8,
|
||||
WriteMemory16,
|
||||
WriteMemory32,
|
||||
WriteMemory64,
|
||||
WriteMemory128,
|
||||
WrappedWriteMemory8,
|
||||
WrappedWriteMemory16,
|
||||
WrappedWriteMemory32,
|
||||
WrappedWriteMemory64,
|
||||
WrappedWriteMemory128,
|
||||
ExclusiveWriteMemory8,
|
||||
ExclusiveWriteMemory16,
|
||||
ExclusiveWriteMemory32,
|
||||
ExclusiveWriteMemory64,
|
||||
ExclusiveWriteMemory128,
|
||||
CallSVC,
|
||||
ExceptionRaised,
|
||||
InstructionSynchronizationBarrierRaised,
|
||||
InstructionCacheOperationRaised,
|
||||
DataCacheOperationRaised,
|
||||
GetCNTPCT,
|
||||
AddTicks,
|
||||
GetTicksRemaining,
|
||||
};
|
||||
|
||||
struct Relocation {
|
||||
std::ptrdiff_t code_offset;
|
||||
LinkTarget target;
|
||||
};
|
||||
|
||||
enum class BlockRelocationType {
|
||||
Branch,
|
||||
MoveToScratch1,
|
||||
};
|
||||
|
||||
struct BlockRelocation {
|
||||
std::ptrdiff_t code_offset;
|
||||
BlockRelocationType type;
|
||||
};
|
||||
|
||||
struct EmittedBlockInfo {
|
||||
CodePtr entry_point;
|
||||
size_t size;
|
||||
std::vector<Relocation> relocations;
|
||||
ankerl::unordered_dense::map<IR::LocationDescriptor, std::vector<BlockRelocation>> block_relocations;
|
||||
ankerl::unordered_dense::map<std::ptrdiff_t, FastmemPatchInfo> fastmem_patch_info;
|
||||
};
|
||||
|
||||
struct EmitConfig {
|
||||
OptimizationFlag optimizations;
|
||||
bool HasOptimization(OptimizationFlag f) const { return (f & optimizations) != no_optimizations; }
|
||||
|
||||
bool hook_isb;
|
||||
|
||||
// System registers
|
||||
u64 cntfreq_el0;
|
||||
u32 ctr_el0;
|
||||
u32 dczid_el0;
|
||||
const u64* tpidrro_el0;
|
||||
u64* tpidr_el0;
|
||||
|
||||
// Memory
|
||||
bool check_halt_on_memory_access;
|
||||
|
||||
// Page table
|
||||
u64 page_table_pointer;
|
||||
size_t page_table_address_space_bits;
|
||||
int page_table_pointer_mask_bits;
|
||||
bool silently_mirror_page_table;
|
||||
bool absolute_offset_page_table;
|
||||
u8 detect_misaligned_access_via_page_table;
|
||||
bool only_detect_misalignment_via_page_table_on_page_boundary;
|
||||
|
||||
// Fastmem
|
||||
std::optional<u64> fastmem_pointer;
|
||||
bool recompile_on_fastmem_failure;
|
||||
size_t fastmem_address_space_bits;
|
||||
bool silently_mirror_fastmem;
|
||||
|
||||
// Timing
|
||||
bool wall_clock_cntpct;
|
||||
bool enable_cycle_counting;
|
||||
|
||||
// Endianness
|
||||
bool always_little_endian;
|
||||
|
||||
// Frontend specific callbacks
|
||||
FP::FPCR (*descriptor_to_fpcr)(const IR::LocationDescriptor& descriptor);
|
||||
oaknut::Label (*emit_cond)(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond);
|
||||
void (*emit_condition_failed_terminal)(oaknut::CodeGenerator& code, EmitContext& ctx);
|
||||
void (*emit_terminal)(oaknut::CodeGenerator& code, EmitContext& ctx);
|
||||
void (*emit_check_memory_abort)(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, oaknut::Label& end);
|
||||
|
||||
// State offsets
|
||||
size_t state_nzcv_offset;
|
||||
size_t state_fpsr_offset;
|
||||
size_t state_exclusive_state_offset;
|
||||
|
||||
// A32 specific
|
||||
std::array<std::shared_ptr<A32::Coprocessor>, 16> coprocessors{};
|
||||
|
||||
// Debugging
|
||||
bool very_verbose_debugging_output;
|
||||
};
|
||||
|
||||
EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const EmitConfig& emit_conf, FastmemManager& fastmem_manager);
|
||||
|
||||
template<IR::Opcode op>
|
||||
void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
void EmitRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, LinkTarget link_target);
|
||||
void EmitBlockLinkRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, const IR::LocationDescriptor& descriptor, BlockRelocationType type);
|
||||
oaknut::Label EmitA32Cond(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond);
|
||||
oaknut::Label EmitA64Cond(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond);
|
||||
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx);
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx);
|
||||
void EmitA32ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx);
|
||||
void EmitA64ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx);
|
||||
void EmitA32CheckMemoryAbort(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, oaknut::Label& end);
|
||||
void EmitA64CheckMemoryAbort(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, oaknut::Label& end);
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
707
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_a32.cpp
Normal file
707
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_a32.cpp
Normal file
|
|
@ -0,0 +1,707 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <mcl/bit/bit_field.hpp>
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/frontend/A32/a32_types.h"
|
||||
#include "dynarmic/interface/halt_reason.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
using namespace oaknut::util;
|
||||
|
||||
oaknut::Label EmitA32Cond(oaknut::CodeGenerator& code, EmitContext&, IR::Cond cond) {
|
||||
oaknut::Label pass;
|
||||
// TODO: Flags in host flags
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
|
||||
code.MSR(oaknut::SystemReg::NZCV, Xscratch0);
|
||||
code.B(static_cast<oaknut::Cond>(cond), pass);
|
||||
return pass;
|
||||
}
|
||||
|
||||
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step);
|
||||
|
||||
void EmitA32Terminal(oaknut::CodeGenerator&, EmitContext&, IR::Term::Interpret, IR::LocationDescriptor, bool) {
|
||||
ASSERT_FALSE("Interpret should never be emitted.");
|
||||
}
|
||||
|
||||
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) {
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
}
|
||||
|
||||
static void EmitSetUpperLocationDescriptor(oaknut::CodeGenerator& code, EmitContext& ctx, IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) {
|
||||
auto get_upper = [](const IR::LocationDescriptor& desc) -> u32 {
|
||||
return static_cast<u32>(A32::LocationDescriptor{desc}.SetSingleStepping(false).UniqueHash() >> 32);
|
||||
};
|
||||
|
||||
const u32 old_upper = get_upper(old_location);
|
||||
const u32 new_upper = [&] {
|
||||
const u32 mask = ~u32(ctx.conf.always_little_endian ? 0x2 : 0);
|
||||
return get_upper(new_location) & mask;
|
||||
}();
|
||||
|
||||
if (old_upper != new_upper) {
|
||||
code.MOV(Wscratch0, new_upper);
|
||||
code.STR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
|
||||
}
|
||||
}
|
||||
|
||||
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
EmitSetUpperLocationDescriptor(code, ctx, terminal.next, initial_location);
|
||||
|
||||
oaknut::Label fail;
|
||||
|
||||
if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
code.CMP(Xticks, 0);
|
||||
code.B(LE, fail);
|
||||
EmitBlockLinkRelocation(code, ctx, terminal.next, BlockRelocationType::Branch);
|
||||
} else {
|
||||
code.LDAR(Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch0, fail);
|
||||
EmitBlockLinkRelocation(code, ctx, terminal.next, BlockRelocationType::Branch);
|
||||
}
|
||||
}
|
||||
|
||||
code.l(fail);
|
||||
code.MOV(Wscratch0, A32::LocationDescriptor{terminal.next}.PC());
|
||||
code.STR(Wscratch0, Xstate, offsetof(A32JitState, regs) + sizeof(u32) * 15);
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
}
|
||||
|
||||
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
EmitSetUpperLocationDescriptor(code, ctx, terminal.next, initial_location);
|
||||
|
||||
if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
|
||||
EmitBlockLinkRelocation(code, ctx, terminal.next, BlockRelocationType::Branch);
|
||||
}
|
||||
|
||||
code.MOV(Wscratch0, A32::LocationDescriptor{terminal.next}.PC());
|
||||
code.STR(Wscratch0, Xstate, offsetof(A32JitState, regs) + sizeof(u32) * 15);
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
}
|
||||
|
||||
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::PopRSBHint, IR::LocationDescriptor, bool is_single_step) {
|
||||
if (ctx.conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) && !is_single_step) {
|
||||
oaknut::Label fail;
|
||||
|
||||
code.LDR(Wscratch2, SP, offsetof(StackLayout, rsb_ptr));
|
||||
code.AND(Wscratch2, Wscratch2, RSBIndexMask);
|
||||
code.ADD(X2, SP, Xscratch2);
|
||||
code.SUB(Wscratch2, Wscratch2, sizeof(RSBEntry));
|
||||
code.STR(Wscratch2, SP, offsetof(StackLayout, rsb_ptr));
|
||||
|
||||
code.LDP(Xscratch0, Xscratch1, X2, offsetof(StackLayout, rsb));
|
||||
|
||||
static_assert(offsetof(A32JitState, regs) + 16 * sizeof(u32) == offsetof(A32JitState, upper_location_descriptor));
|
||||
code.LDUR(X0, Xstate, offsetof(A32JitState, regs) + 15 * sizeof(u32));
|
||||
|
||||
code.CMP(X0, Xscratch0);
|
||||
code.B(NE, fail);
|
||||
code.BR(Xscratch1);
|
||||
|
||||
code.l(fail);
|
||||
}
|
||||
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
}
|
||||
|
||||
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::FastDispatchHint, IR::LocationDescriptor, bool) {
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
|
||||
// TODO: Implement FastDispatchHint optimization
|
||||
}
|
||||
|
||||
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
oaknut::Label pass = EmitA32Cond(code, ctx, terminal.if_);
|
||||
EmitA32Terminal(code, ctx, terminal.else_, initial_location, is_single_step);
|
||||
code.l(pass);
|
||||
EmitA32Terminal(code, ctx, terminal.then_, initial_location, is_single_step);
|
||||
}
|
||||
|
||||
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
oaknut::Label fail;
|
||||
code.LDRB(Wscratch0, SP, offsetof(StackLayout, check_bit));
|
||||
code.CBZ(Wscratch0, fail);
|
||||
EmitA32Terminal(code, ctx, terminal.then_, initial_location, is_single_step);
|
||||
code.l(fail);
|
||||
EmitA32Terminal(code, ctx, terminal.else_, initial_location, is_single_step);
|
||||
}
|
||||
|
||||
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
oaknut::Label fail;
|
||||
code.LDAR(Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch0, fail);
|
||||
EmitA32Terminal(code, ctx, terminal.else_, initial_location, is_single_step);
|
||||
code.l(fail);
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
}
|
||||
|
||||
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
boost::apply_visitor([&](const auto& t) { EmitA32Terminal(code, ctx, t, initial_location, is_single_step); }, terminal);
|
||||
}
|
||||
|
||||
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx) {
|
||||
const A32::LocationDescriptor location{ctx.block.Location()};
|
||||
EmitA32Terminal(code, ctx, ctx.block.GetTerminal(), location.SetSingleStepping(false), location.SingleStepping());
|
||||
}
|
||||
|
||||
void EmitA32ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx) {
|
||||
const A32::LocationDescriptor location{ctx.block.Location()};
|
||||
EmitA32Terminal(code, ctx, IR::Term::LinkBlock{ctx.block.ConditionFailedLocation()}, location.SetSingleStepping(false), location.SingleStepping());
|
||||
}
|
||||
|
||||
void EmitA32CheckMemoryAbort(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, oaknut::Label& end) {
|
||||
if (!ctx.conf.check_halt_on_memory_access) {
|
||||
return;
|
||||
}
|
||||
|
||||
const A32::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}};
|
||||
|
||||
code.LDAR(Xscratch0, Xhalt);
|
||||
code.TST(Xscratch0, static_cast<u32>(HaltReason::MemoryAbort));
|
||||
code.B(EQ, end);
|
||||
EmitSetUpperLocationDescriptor(code, ctx, current_location, ctx.block.Location());
|
||||
code.MOV(Wscratch0, current_location.PC());
|
||||
code.STR(Wscratch0, Xstate, offsetof(A32JitState, regs) + sizeof(u32) * 15);
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnFromRunCode);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetCheckBit>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (args[0].IsImmediate()) {
|
||||
if (args[0].GetImmediateU1()) {
|
||||
code.MOV(Wscratch0, 1);
|
||||
code.STRB(Wscratch0, SP, offsetof(StackLayout, check_bit));
|
||||
} else {
|
||||
code.STRB(WZR, SP, offsetof(StackLayout, check_bit));
|
||||
}
|
||||
} else {
|
||||
auto Wbit = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wbit);
|
||||
code.STRB(Wbit, SP, offsetof(StackLayout, check_bit));
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetRegister>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
|
||||
|
||||
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wresult);
|
||||
|
||||
// TODO: Detect if Gpr vs Fpr is more appropriate
|
||||
|
||||
code.LDR(Wresult, Xstate, offsetof(A32JitState, regs) + sizeof(u32) * static_cast<size_t>(reg));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetExtendedRegister32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
ASSERT(A32::IsSingleExtReg(reg));
|
||||
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::S0);
|
||||
|
||||
auto Sresult = ctx.reg_alloc.WriteS(inst);
|
||||
RegAlloc::Realize(Sresult);
|
||||
|
||||
// TODO: Detect if Gpr vs Fpr is more appropriate
|
||||
|
||||
code.LDR(Sresult, Xstate, offsetof(A32JitState, ext_regs) + sizeof(u32) * index);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetVector>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
ASSERT(A32::IsDoubleExtReg(reg) || A32::IsQuadExtReg(reg));
|
||||
|
||||
if (A32::IsDoubleExtReg(reg)) {
|
||||
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::D0);
|
||||
auto Dresult = ctx.reg_alloc.WriteD(inst);
|
||||
RegAlloc::Realize(Dresult);
|
||||
code.LDR(Dresult, Xstate, offsetof(A32JitState, ext_regs) + sizeof(u64) * index);
|
||||
} else {
|
||||
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::Q0);
|
||||
auto Qresult = ctx.reg_alloc.WriteQ(inst);
|
||||
RegAlloc::Realize(Qresult);
|
||||
code.LDR(Qresult, Xstate, offsetof(A32JitState, ext_regs) + 2 * sizeof(u64) * index);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetExtendedRegister64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
ASSERT(A32::IsDoubleExtReg(reg));
|
||||
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::D0);
|
||||
|
||||
auto Dresult = ctx.reg_alloc.WriteD(inst);
|
||||
RegAlloc::Realize(Dresult);
|
||||
|
||||
// TODO: Detect if Gpr vs Fpr is more appropriate
|
||||
|
||||
code.LDR(Dresult, Xstate, offsetof(A32JitState, ext_regs) + 2 * sizeof(u32) * index);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetRegister>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
auto Wvalue = ctx.reg_alloc.ReadW(args[1]);
|
||||
RegAlloc::Realize(Wvalue);
|
||||
|
||||
// TODO: Detect if Gpr vs Fpr is more appropriate
|
||||
|
||||
code.STR(Wvalue, Xstate, offsetof(A32JitState, regs) + sizeof(u32) * static_cast<size_t>(reg));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetExtendedRegister32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
ASSERT(A32::IsSingleExtReg(reg));
|
||||
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::S0);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Svalue = ctx.reg_alloc.ReadS(args[1]);
|
||||
RegAlloc::Realize(Svalue);
|
||||
|
||||
// TODO: Detect if Gpr vs Fpr is more appropriate
|
||||
|
||||
code.STR(Svalue, Xstate, offsetof(A32JitState, ext_regs) + sizeof(u32) * index);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetExtendedRegister64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
ASSERT(A32::IsDoubleExtReg(reg));
|
||||
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::D0);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Dvalue = ctx.reg_alloc.ReadD(args[1]);
|
||||
RegAlloc::Realize(Dvalue);
|
||||
|
||||
// TODO: Detect if Gpr vs Fpr is more appropriate
|
||||
|
||||
code.STR(Dvalue, Xstate, offsetof(A32JitState, ext_regs) + 2 * sizeof(u32) * index);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetVector>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
ASSERT(A32::IsDoubleExtReg(reg) || A32::IsQuadExtReg(reg));
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (A32::IsDoubleExtReg(reg)) {
|
||||
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::D0);
|
||||
auto Dvalue = ctx.reg_alloc.ReadD(args[1]);
|
||||
RegAlloc::Realize(Dvalue);
|
||||
code.STR(Dvalue, Xstate, offsetof(A32JitState, ext_regs) + sizeof(u64) * index);
|
||||
} else {
|
||||
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::Q0);
|
||||
auto Qvalue = ctx.reg_alloc.ReadQ(args[1]);
|
||||
RegAlloc::Realize(Qvalue);
|
||||
code.STR(Qvalue, Xstate, offsetof(A32JitState, ext_regs) + 2 * sizeof(u64) * index);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetCpsr>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto Wcpsr = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wcpsr);
|
||||
|
||||
static_assert(offsetof(A32JitState, cpsr_nzcv) + sizeof(u32) == offsetof(A32JitState, cpsr_q));
|
||||
|
||||
code.LDP(Wscratch0, Wscratch1, Xstate, offsetof(A32JitState, cpsr_nzcv));
|
||||
code.LDR(Wcpsr, Xstate, offsetof(A32JitState, cpsr_jaifm));
|
||||
code.ORR(Wcpsr, Wcpsr, Wscratch0);
|
||||
code.ORR(Wcpsr, Wcpsr, Wscratch1);
|
||||
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_ge));
|
||||
code.AND(Wscratch0, Wscratch0, 0x80808080);
|
||||
code.MOV(Wscratch1, 0x00204081);
|
||||
code.MUL(Wscratch0, Wscratch0, Wscratch1);
|
||||
code.AND(Wscratch0, Wscratch0, 0xf0000000);
|
||||
code.ORR(Wcpsr, Wcpsr, Wscratch0, LSR, 12);
|
||||
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
|
||||
code.AND(Wscratch0, Wscratch0, 0b11);
|
||||
// 9 8 7 6 5
|
||||
// E T
|
||||
code.ORR(Wscratch0, Wscratch0, Wscratch0, LSL, 3);
|
||||
code.AND(Wscratch0, Wscratch0, 0x11111111);
|
||||
code.ORR(Wcpsr, Wcpsr, Wscratch0, LSL, 5);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetCpsr>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wcpsr = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wcpsr);
|
||||
|
||||
// NZCV, Q flags
|
||||
code.AND(Wscratch0, Wcpsr, 0xF0000000);
|
||||
code.AND(Wscratch1, Wcpsr, 1 << 27);
|
||||
|
||||
static_assert(offsetof(A32JitState, cpsr_nzcv) + sizeof(u32) == offsetof(A32JitState, cpsr_q));
|
||||
code.STP(Wscratch0, Wscratch1, Xstate, offsetof(A32JitState, cpsr_nzcv));
|
||||
|
||||
// GE flags
|
||||
// this does the following:
|
||||
// cpsr_ge |= mcl::bit::get_bit<19>(cpsr) ? 0xFF000000 : 0;
|
||||
// cpsr_ge |= mcl::bit::get_bit<18>(cpsr) ? 0x00FF0000 : 0;
|
||||
// cpsr_ge |= mcl::bit::get_bit<17>(cpsr) ? 0x0000FF00 : 0;
|
||||
// cpsr_ge |= mcl::bit::get_bit<16>(cpsr) ? 0x000000FF : 0;
|
||||
code.UBFX(Wscratch0, Wcpsr, 16, 4);
|
||||
code.MOV(Wscratch1, 0x00204081);
|
||||
code.MUL(Wscratch0, Wscratch0, Wscratch1);
|
||||
code.AND(Wscratch0, Wscratch0, 0x01010101);
|
||||
code.LSL(Wscratch1, Wscratch0, 8);
|
||||
code.SUB(Wscratch0, Wscratch1, Wscratch0);
|
||||
|
||||
// Other flags
|
||||
code.MOV(Wscratch1, 0x010001DF);
|
||||
code.AND(Wscratch1, Wcpsr, Wscratch1);
|
||||
|
||||
static_assert(offsetof(A32JitState, cpsr_jaifm) + sizeof(u32) == offsetof(A32JitState, cpsr_ge));
|
||||
code.STP(Wscratch1, Wscratch0, Xstate, offsetof(A32JitState, cpsr_jaifm));
|
||||
|
||||
// IT state
|
||||
code.AND(Wscratch0, Wcpsr, 0xFC00);
|
||||
code.LSR(Wscratch1, Wcpsr, 17);
|
||||
code.AND(Wscratch1, Wscratch1, 0x300);
|
||||
code.ORR(Wscratch0, Wscratch0, Wscratch1);
|
||||
|
||||
// E flag, T flag
|
||||
code.LSR(Wscratch1, Wcpsr, 8);
|
||||
code.AND(Wscratch1, Wscratch1, 0x2);
|
||||
code.ORR(Wscratch0, Wscratch0, Wscratch1);
|
||||
code.LDR(Wscratch1, Xstate, offsetof(A32JitState, upper_location_descriptor));
|
||||
code.BFXIL(Wscratch0, Wcpsr, 5, 1);
|
||||
code.AND(Wscratch1, Wscratch1, 0xFFFF0000);
|
||||
code.ORR(Wscratch0, Wscratch0, Wscratch1);
|
||||
code.STR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetCpsrNZCV>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wnzcv);
|
||||
|
||||
code.STR(Wnzcv, Xstate, offsetof(A32JitState, cpsr_nzcv));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetCpsrNZCVRaw>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wnzcv);
|
||||
|
||||
code.STR(Wnzcv, Xstate, offsetof(A32JitState, cpsr_nzcv));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetCpsrNZCVQ>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wnzcv);
|
||||
|
||||
static_assert(offsetof(A32JitState, cpsr_nzcv) + sizeof(u32) == offsetof(A32JitState, cpsr_q));
|
||||
|
||||
code.AND(Wscratch0, Wnzcv, 0xf000'0000);
|
||||
code.AND(Wscratch1, Wnzcv, 0x0800'0000);
|
||||
code.STP(Wscratch0, Wscratch1, Xstate, offsetof(A32JitState, cpsr_nzcv));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetCpsrNZ>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
auto Wnz = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wnz);
|
||||
|
||||
// TODO: Track latent value
|
||||
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
|
||||
code.AND(Wscratch0, Wscratch0, 0x30000000);
|
||||
code.ORR(Wscratch0, Wscratch0, Wnz);
|
||||
code.STR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetCpsrNZC>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
// TODO: Track latent value
|
||||
|
||||
if (args[0].IsImmediate()) {
|
||||
if (args[1].IsImmediate()) {
|
||||
const u32 carry = args[1].GetImmediateU1() ? 0x2000'0000 : 0;
|
||||
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
|
||||
code.AND(Wscratch0, Wscratch0, 0x10000000);
|
||||
if (carry) {
|
||||
code.ORR(Wscratch0, Wscratch0, carry);
|
||||
}
|
||||
code.STR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
|
||||
} else {
|
||||
auto Wc = ctx.reg_alloc.ReadW(args[1]);
|
||||
RegAlloc::Realize(Wc);
|
||||
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
|
||||
code.AND(Wscratch0, Wscratch0, 0x10000000);
|
||||
code.ORR(Wscratch0, Wscratch0, Wc);
|
||||
code.STR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
|
||||
}
|
||||
} else {
|
||||
if (args[1].IsImmediate()) {
|
||||
const u32 carry = args[1].GetImmediateU1() ? 0x2000'0000 : 0;
|
||||
auto Wnz = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wnz);
|
||||
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
|
||||
code.AND(Wscratch0, Wscratch0, 0x10000000);
|
||||
code.ORR(Wscratch0, Wscratch0, Wnz);
|
||||
if (carry) {
|
||||
code.ORR(Wscratch0, Wscratch0, carry);
|
||||
}
|
||||
code.STR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
|
||||
} else {
|
||||
auto Wnz = ctx.reg_alloc.ReadW(args[0]);
|
||||
auto Wc = ctx.reg_alloc.ReadW(args[1]);
|
||||
RegAlloc::Realize(Wnz, Wc);
|
||||
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
|
||||
code.AND(Wscratch0, Wscratch0, 0x10000000);
|
||||
code.ORR(Wscratch0, Wscratch0, Wnz);
|
||||
code.ORR(Wscratch0, Wscratch0, Wc);
|
||||
code.STR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_nzcv));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetCFlag>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto Wflag = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wflag);
|
||||
|
||||
code.LDR(Wflag, Xstate, offsetof(A32JitState, cpsr_nzcv));
|
||||
code.AND(Wflag, Wflag, 1 << 29);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32OrQFlag>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wflag = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wflag);
|
||||
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_q));
|
||||
code.ORR(Wscratch0, Wscratch0, Wflag, LSL, 27);
|
||||
code.STR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_q));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetGEFlags>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto Snzcv = ctx.reg_alloc.WriteS(inst);
|
||||
RegAlloc::Realize(Snzcv);
|
||||
|
||||
code.LDR(Snzcv, Xstate, offsetof(A32JitState, cpsr_ge));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetGEFlags>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
auto Snzcv = ctx.reg_alloc.ReadS(args[0]);
|
||||
RegAlloc::Realize(Snzcv);
|
||||
|
||||
code.STR(Snzcv, Xstate, offsetof(A32JitState, cpsr_ge));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetGEFlagsCompressed>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wge = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wge);
|
||||
|
||||
code.LSR(Wscratch0, Wge, 16);
|
||||
code.MOV(Wscratch1, 0x00204081);
|
||||
code.MUL(Wscratch0, Wscratch0, Wscratch1);
|
||||
code.AND(Wscratch0, Wscratch0, 0x01010101);
|
||||
code.LSL(Wscratch1, Wscratch0, 8);
|
||||
code.SUB(Wscratch0, Wscratch1, Wscratch0);
|
||||
code.STR(Wscratch0, Xstate, offsetof(A32JitState, cpsr_ge));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32BXWritePC>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const u32 upper_without_t = (A32::LocationDescriptor{ctx.block.EndLocation()}.SetSingleStepping(false).UniqueHash() >> 32) & 0xFFFFFFFE;
|
||||
|
||||
static_assert(offsetof(A32JitState, regs) + 16 * sizeof(u32) == offsetof(A32JitState, upper_location_descriptor));
|
||||
|
||||
if (args[0].IsImmediate()) {
|
||||
const u32 new_pc = args[0].GetImmediateU32();
|
||||
const u32 mask = mcl::bit::get_bit<0>(new_pc) ? 0xFFFFFFFE : 0xFFFFFFFC;
|
||||
const u32 new_upper = upper_without_t | (mcl::bit::get_bit<0>(new_pc) ? 1 : 0);
|
||||
|
||||
code.MOV(Xscratch0, (u64{new_upper} << 32) | (new_pc & mask));
|
||||
code.STUR(Xscratch0, Xstate, offsetof(A32JitState, regs) + 15 * sizeof(u32));
|
||||
} else {
|
||||
auto Wpc = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wpc);
|
||||
ctx.reg_alloc.SpillFlags();
|
||||
|
||||
code.ANDS(Wscratch0, Wpc, 1);
|
||||
code.MOV(Wscratch1, 3);
|
||||
code.CSEL(Wscratch1, Wscratch0, Wscratch1, NE);
|
||||
code.BIC(Wscratch1, Wpc, Wscratch1);
|
||||
code.MOV(Wscratch0, upper_without_t);
|
||||
code.CINC(Wscratch0, Wscratch0, NE);
|
||||
code.STP(Wscratch1, Wscratch0, Xstate, offsetof(A32JitState, regs) + 15 * sizeof(u32));
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32UpdateUpperLocationDescriptor>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst*) {
|
||||
for (auto& inst : ctx.block) {
|
||||
if (inst.GetOpcode() == IR::Opcode::A32BXWritePC) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
EmitSetUpperLocationDescriptor(code, ctx, ctx.block.EndLocation(), ctx.block.Location());
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32CallSupervisor>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall();
|
||||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.SUB(X1, X1, Xticks);
|
||||
EmitRelocation(code, ctx, LinkTarget::AddTicks);
|
||||
}
|
||||
|
||||
code.MOV(W1, args[0].GetImmediateU32());
|
||||
EmitRelocation(code, ctx, LinkTarget::CallSVC);
|
||||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining);
|
||||
code.STR(X0, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.MOV(Xticks, X0);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ExceptionRaised>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall();
|
||||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.SUB(X1, X1, Xticks);
|
||||
EmitRelocation(code, ctx, LinkTarget::AddTicks);
|
||||
}
|
||||
|
||||
code.MOV(W1, args[0].GetImmediateU32());
|
||||
code.MOV(W2, args[1].GetImmediateU32());
|
||||
EmitRelocation(code, ctx, LinkTarget::ExceptionRaised);
|
||||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining);
|
||||
code.STR(X0, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.MOV(Xticks, X0);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32DataSynchronizationBarrier>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
|
||||
code.DSB(oaknut::BarrierOp::SY);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32DataMemoryBarrier>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
|
||||
code.DMB(oaknut::BarrierOp::SY);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32InstructionSynchronizationBarrier>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst*) {
|
||||
if (!ctx.conf.hook_isb) {
|
||||
return;
|
||||
}
|
||||
|
||||
ctx.reg_alloc.PrepareForCall();
|
||||
EmitRelocation(code, ctx, LinkTarget::InstructionSynchronizationBarrierRaised);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetFpscr>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto Wfpscr = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wfpscr);
|
||||
ctx.fpsr.Spill();
|
||||
|
||||
static_assert(offsetof(A32JitState, fpsr) + sizeof(u32) == offsetof(A32JitState, fpsr_nzcv));
|
||||
|
||||
code.LDR(Wfpscr, Xstate, offsetof(A32JitState, upper_location_descriptor));
|
||||
code.LDP(Wscratch0, Wscratch1, Xstate, offsetof(A32JitState, fpsr));
|
||||
code.AND(Wfpscr, Wfpscr, 0xffff'0000);
|
||||
code.ORR(Wscratch0, Wscratch0, Wscratch1);
|
||||
code.ORR(Wfpscr, Wfpscr, Wscratch0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetFpscr>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wfpscr = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wfpscr);
|
||||
ctx.fpsr.Overwrite();
|
||||
|
||||
static_assert(offsetof(A32JitState, fpsr) + sizeof(u32) == offsetof(A32JitState, fpsr_nzcv));
|
||||
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
|
||||
code.MOV(Wscratch1, 0x07f7'0000);
|
||||
code.AND(Wscratch1, Wfpscr, Wscratch1);
|
||||
code.AND(Wscratch0, Wscratch0, 0x0000'ffff);
|
||||
code.ORR(Wscratch0, Wscratch0, Wscratch1);
|
||||
code.STR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
|
||||
|
||||
code.MOV(Wscratch0, 0x0800'009f);
|
||||
code.AND(Wscratch0, Wfpscr, Wscratch0);
|
||||
code.AND(Wscratch1, Wfpscr, 0xf000'0000);
|
||||
code.STP(Wscratch0, Wscratch1, Xstate, offsetof(A32JitState, fpsr));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetFpscrNZCV>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto Wnzcv = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wnzcv);
|
||||
|
||||
code.LDR(Wnzcv, Xstate, offsetof(A32JitState, fpsr_nzcv));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetFpscrNZCV>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wnzcv);
|
||||
|
||||
code.STR(Wnzcv, Xstate, offsetof(A32JitState, fpsr_nzcv));
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
|
|
@ -0,0 +1,299 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/interface/A32/coprocessor.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
using namespace oaknut::util;
|
||||
|
||||
static void EmitCoprocessorException() {
|
||||
ASSERT_FALSE("Should raise coproc exception here");
|
||||
}
|
||||
|
||||
static void CallCoprocCallback(oaknut::CodeGenerator& code, EmitContext& ctx, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, std::optional<Argument::copyable_reference> arg0 = {}, std::optional<Argument::copyable_reference> arg1 = {}) {
|
||||
ctx.reg_alloc.PrepareForCall({}, arg0, arg1);
|
||||
|
||||
if (callback.user_arg) {
|
||||
code.MOV(X0, reinterpret_cast<u64>(*callback.user_arg));
|
||||
}
|
||||
|
||||
code.MOV(Xscratch0, reinterpret_cast<u64>(callback.function));
|
||||
code.BLR(Xscratch0);
|
||||
|
||||
if (inst) {
|
||||
ctx.reg_alloc.DefineAsRegister(inst, X0);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32CoprocInternalOperation>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto coproc_info = inst->GetArg(0).GetCoprocInfo();
|
||||
const size_t coproc_num = coproc_info[0];
|
||||
const bool two = coproc_info[1] != 0;
|
||||
const auto opc1 = static_cast<unsigned>(coproc_info[2]);
|
||||
const auto CRd = static_cast<A32::CoprocReg>(coproc_info[3]);
|
||||
const auto CRn = static_cast<A32::CoprocReg>(coproc_info[4]);
|
||||
const auto CRm = static_cast<A32::CoprocReg>(coproc_info[5]);
|
||||
const auto opc2 = static_cast<unsigned>(coproc_info[6]);
|
||||
|
||||
std::shared_ptr<A32::Coprocessor> coproc = ctx.conf.coprocessors[coproc_num];
|
||||
if (!coproc) {
|
||||
EmitCoprocessorException();
|
||||
return;
|
||||
}
|
||||
|
||||
const auto action = coproc->CompileInternalOperation(two, opc1, CRd, CRn, CRm, opc2);
|
||||
if (!action) {
|
||||
EmitCoprocessorException();
|
||||
return;
|
||||
}
|
||||
|
||||
CallCoprocCallback(code, ctx, *action);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32CoprocSendOneWord>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto coproc_info = inst->GetArg(0).GetCoprocInfo();
|
||||
const size_t coproc_num = coproc_info[0];
|
||||
const bool two = coproc_info[1] != 0;
|
||||
const auto opc1 = static_cast<unsigned>(coproc_info[2]);
|
||||
const auto CRn = static_cast<A32::CoprocReg>(coproc_info[3]);
|
||||
const auto CRm = static_cast<A32::CoprocReg>(coproc_info[4]);
|
||||
const auto opc2 = static_cast<unsigned>(coproc_info[5]);
|
||||
|
||||
std::shared_ptr<A32::Coprocessor> coproc = ctx.conf.coprocessors[coproc_num];
|
||||
if (!coproc) {
|
||||
EmitCoprocessorException();
|
||||
return;
|
||||
}
|
||||
|
||||
const auto action = coproc->CompileSendOneWord(two, opc1, CRn, CRm, opc2);
|
||||
|
||||
if (std::holds_alternative<std::monostate>(action)) {
|
||||
EmitCoprocessorException();
|
||||
return;
|
||||
}
|
||||
|
||||
if (const auto cb = std::get_if<A32::Coprocessor::Callback>(&action)) {
|
||||
CallCoprocCallback(code, ctx, *cb, nullptr, args[1]);
|
||||
return;
|
||||
}
|
||||
|
||||
if (const auto destination_ptr = std::get_if<u32*>(&action)) {
|
||||
auto Wvalue = ctx.reg_alloc.ReadW(args[1]);
|
||||
RegAlloc::Realize(Wvalue);
|
||||
|
||||
code.MOV(Xscratch0, reinterpret_cast<u64>(*destination_ptr));
|
||||
code.STR(Wvalue, Xscratch0);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32CoprocSendTwoWords>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const auto coproc_info = inst->GetArg(0).GetCoprocInfo();
|
||||
const size_t coproc_num = coproc_info[0];
|
||||
const bool two = coproc_info[1] != 0;
|
||||
const auto opc = static_cast<unsigned>(coproc_info[2]);
|
||||
const auto CRm = static_cast<A32::CoprocReg>(coproc_info[3]);
|
||||
|
||||
std::shared_ptr<A32::Coprocessor> coproc = ctx.conf.coprocessors[coproc_num];
|
||||
if (!coproc) {
|
||||
EmitCoprocessorException();
|
||||
return;
|
||||
}
|
||||
|
||||
const auto action = coproc->CompileSendTwoWords(two, opc, CRm);
|
||||
|
||||
if (std::holds_alternative<std::monostate>(action)) {
|
||||
EmitCoprocessorException();
|
||||
return;
|
||||
}
|
||||
|
||||
if (const auto cb = std::get_if<A32::Coprocessor::Callback>(&action)) {
|
||||
CallCoprocCallback(code, ctx, *cb, nullptr, args[1], args[2]);
|
||||
return;
|
||||
}
|
||||
|
||||
if (const auto destination_ptrs = std::get_if<std::array<u32*, 2>>(&action)) {
|
||||
auto Wvalue1 = ctx.reg_alloc.ReadW(args[1]);
|
||||
auto Wvalue2 = ctx.reg_alloc.ReadW(args[2]);
|
||||
RegAlloc::Realize(Wvalue1, Wvalue2);
|
||||
|
||||
code.MOV(Xscratch0, reinterpret_cast<u64>((*destination_ptrs)[0]));
|
||||
code.MOV(Xscratch1, reinterpret_cast<u64>((*destination_ptrs)[1]));
|
||||
code.STR(Wvalue1, Xscratch0);
|
||||
code.STR(Wvalue2, Xscratch1);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32CoprocGetOneWord>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto coproc_info = inst->GetArg(0).GetCoprocInfo();
|
||||
|
||||
const size_t coproc_num = coproc_info[0];
|
||||
const bool two = coproc_info[1] != 0;
|
||||
const auto opc1 = static_cast<unsigned>(coproc_info[2]);
|
||||
const auto CRn = static_cast<A32::CoprocReg>(coproc_info[3]);
|
||||
const auto CRm = static_cast<A32::CoprocReg>(coproc_info[4]);
|
||||
const auto opc2 = static_cast<unsigned>(coproc_info[5]);
|
||||
|
||||
std::shared_ptr<A32::Coprocessor> coproc = ctx.conf.coprocessors[coproc_num];
|
||||
if (!coproc) {
|
||||
EmitCoprocessorException();
|
||||
return;
|
||||
}
|
||||
|
||||
const auto action = coproc->CompileGetOneWord(two, opc1, CRn, CRm, opc2);
|
||||
|
||||
if (std::holds_alternative<std::monostate>(action)) {
|
||||
EmitCoprocessorException();
|
||||
return;
|
||||
}
|
||||
|
||||
if (const auto cb = std::get_if<A32::Coprocessor::Callback>(&action)) {
|
||||
CallCoprocCallback(code, ctx, *cb, inst);
|
||||
return;
|
||||
}
|
||||
|
||||
if (const auto source_ptr = std::get_if<u32*>(&action)) {
|
||||
auto Wvalue = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wvalue);
|
||||
|
||||
code.MOV(Xscratch0, reinterpret_cast<u64>(*source_ptr));
|
||||
code.LDR(Wvalue, Xscratch0);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32CoprocGetTwoWords>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto coproc_info = inst->GetArg(0).GetCoprocInfo();
|
||||
const size_t coproc_num = coproc_info[0];
|
||||
const bool two = coproc_info[1] != 0;
|
||||
const unsigned opc = coproc_info[2];
|
||||
const auto CRm = static_cast<A32::CoprocReg>(coproc_info[3]);
|
||||
|
||||
std::shared_ptr<A32::Coprocessor> coproc = ctx.conf.coprocessors[coproc_num];
|
||||
if (!coproc) {
|
||||
EmitCoprocessorException();
|
||||
return;
|
||||
}
|
||||
|
||||
auto action = coproc->CompileGetTwoWords(two, opc, CRm);
|
||||
|
||||
if (std::holds_alternative<std::monostate>(action)) {
|
||||
EmitCoprocessorException();
|
||||
return;
|
||||
}
|
||||
|
||||
if (const auto cb = std::get_if<A32::Coprocessor::Callback>(&action)) {
|
||||
CallCoprocCallback(code, ctx, *cb, inst);
|
||||
return;
|
||||
}
|
||||
|
||||
if (const auto source_ptrs = std::get_if<std::array<u32*, 2>>(&action)) {
|
||||
auto Xvalue = ctx.reg_alloc.WriteX(inst);
|
||||
RegAlloc::Realize(Xvalue);
|
||||
|
||||
code.MOV(Xscratch0, reinterpret_cast<u64>((*source_ptrs)[0]));
|
||||
code.MOV(Xscratch1, reinterpret_cast<u64>((*source_ptrs)[1]));
|
||||
code.LDR(Xvalue, Xscratch0);
|
||||
code.LDR(Wscratch1, Xscratch1);
|
||||
code.BFI(Xvalue, Xscratch1, 32, 32);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32CoprocLoadWords>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const auto coproc_info = inst->GetArg(0).GetCoprocInfo();
|
||||
const size_t coproc_num = coproc_info[0];
|
||||
const bool two = coproc_info[1] != 0;
|
||||
const bool long_transfer = coproc_info[2] != 0;
|
||||
const auto CRd = static_cast<A32::CoprocReg>(coproc_info[3]);
|
||||
const bool has_option = coproc_info[4] != 0;
|
||||
|
||||
std::optional<u8> option = std::nullopt;
|
||||
if (has_option) {
|
||||
option = coproc_info[5];
|
||||
}
|
||||
|
||||
std::shared_ptr<A32::Coprocessor> coproc = ctx.conf.coprocessors[coproc_num];
|
||||
if (!coproc) {
|
||||
EmitCoprocessorException();
|
||||
return;
|
||||
}
|
||||
|
||||
const auto action = coproc->CompileLoadWords(two, long_transfer, CRd, option);
|
||||
if (!action) {
|
||||
EmitCoprocessorException();
|
||||
return;
|
||||
}
|
||||
|
||||
CallCoprocCallback(code, ctx, *action, nullptr, args[1]);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32CoprocStoreWords>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const auto coproc_info = inst->GetArg(0).GetCoprocInfo();
|
||||
const size_t coproc_num = coproc_info[0];
|
||||
const bool two = coproc_info[1] != 0;
|
||||
const bool long_transfer = coproc_info[2] != 0;
|
||||
const auto CRd = static_cast<A32::CoprocReg>(coproc_info[3]);
|
||||
const bool has_option = coproc_info[4] != 0;
|
||||
|
||||
std::optional<u8> option = std::nullopt;
|
||||
if (has_option) {
|
||||
option = coproc_info[5];
|
||||
}
|
||||
|
||||
std::shared_ptr<A32::Coprocessor> coproc = ctx.conf.coprocessors[coproc_num];
|
||||
if (!coproc) {
|
||||
EmitCoprocessorException();
|
||||
return;
|
||||
}
|
||||
|
||||
const auto action = coproc->CompileStoreWords(two, long_transfer, CRd, option);
|
||||
if (!action) {
|
||||
EmitCoprocessorException();
|
||||
return;
|
||||
}
|
||||
|
||||
CallCoprocCallback(code, ctx, *action, nullptr, args[1]);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
|
|
@ -0,0 +1,107 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64_memory.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
using namespace oaknut::util;
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ClearExclusive>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
|
||||
code.STR(WZR, Xstate, offsetof(A32JitState, exclusive_state));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ReadMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitReadMemory<8>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ReadMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitReadMemory<16>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ReadMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitReadMemory<32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ReadMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitReadMemory<64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ExclusiveReadMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitExclusiveReadMemory<8>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ExclusiveReadMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitExclusiveReadMemory<16>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ExclusiveReadMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitExclusiveReadMemory<32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ExclusiveReadMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitExclusiveReadMemory<64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32WriteMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitWriteMemory<8>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32WriteMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitWriteMemory<16>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32WriteMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitWriteMemory<32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32WriteMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitWriteMemory<64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ExclusiveWriteMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitExclusiveWriteMemory<8>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ExclusiveWriteMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitExclusiveWriteMemory<16>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ExclusiveWriteMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitExclusiveWriteMemory<32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ExclusiveWriteMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitExclusiveWriteMemory<64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
519
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_a64.cpp
Normal file
519
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_a64.cpp
Normal file
|
|
@ -0,0 +1,519 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <mcl/bit_cast.hpp>
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/a64_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/interface/halt_reason.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
using namespace oaknut::util;
|
||||
|
||||
oaknut::Label EmitA64Cond(oaknut::CodeGenerator& code, EmitContext&, IR::Cond cond) {
|
||||
oaknut::Label pass;
|
||||
// TODO: Flags in host flags
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A64JitState, cpsr_nzcv));
|
||||
code.MSR(oaknut::SystemReg::NZCV, Xscratch0);
|
||||
code.B(static_cast<oaknut::Cond>(cond), pass);
|
||||
return pass;
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step);
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator&, EmitContext&, IR::Term::Interpret, IR::LocationDescriptor, bool) {
|
||||
ASSERT_FALSE("Interpret should never be emitted.");
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) {
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlock terminal, IR::LocationDescriptor, bool is_single_step) {
|
||||
oaknut::Label fail;
|
||||
|
||||
if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
code.CMP(Xticks, 0);
|
||||
code.B(LE, fail);
|
||||
EmitBlockLinkRelocation(code, ctx, terminal.next, BlockRelocationType::Branch);
|
||||
} else {
|
||||
code.LDAR(Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch0, fail);
|
||||
EmitBlockLinkRelocation(code, ctx, terminal.next, BlockRelocationType::Branch);
|
||||
}
|
||||
}
|
||||
|
||||
code.l(fail);
|
||||
code.MOV(Xscratch0, A64::LocationDescriptor{terminal.next}.PC());
|
||||
code.STR(Xscratch0, Xstate, offsetof(A64JitState, pc));
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) {
|
||||
if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
|
||||
EmitBlockLinkRelocation(code, ctx, terminal.next, BlockRelocationType::Branch);
|
||||
}
|
||||
|
||||
code.MOV(Xscratch0, A64::LocationDescriptor{terminal.next}.PC());
|
||||
code.STR(Xscratch0, Xstate, offsetof(A64JitState, pc));
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::PopRSBHint, IR::LocationDescriptor, bool is_single_step) {
|
||||
if (ctx.conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) && !is_single_step) {
|
||||
oaknut::Label fail;
|
||||
|
||||
code.MOV(Wscratch0, A64::LocationDescriptor::fpcr_mask);
|
||||
code.LDR(W0, Xstate, offsetof(A64JitState, fpcr));
|
||||
code.LDR(X1, Xstate, offsetof(A64JitState, pc));
|
||||
code.AND(W0, W0, Wscratch0);
|
||||
code.AND(X1, X1, A64::LocationDescriptor::pc_mask);
|
||||
code.LSL(X0, X0, A64::LocationDescriptor::fpcr_shift);
|
||||
code.ORR(X0, X0, X1);
|
||||
|
||||
code.LDR(Wscratch2, SP, offsetof(StackLayout, rsb_ptr));
|
||||
code.AND(Wscratch2, Wscratch2, RSBIndexMask);
|
||||
code.ADD(X2, SP, Xscratch2);
|
||||
code.SUB(Wscratch2, Wscratch2, sizeof(RSBEntry));
|
||||
code.STR(Wscratch2, SP, offsetof(StackLayout, rsb_ptr));
|
||||
|
||||
code.LDP(Xscratch0, Xscratch1, X2, offsetof(StackLayout, rsb));
|
||||
|
||||
code.CMP(X0, Xscratch0);
|
||||
code.B(NE, fail);
|
||||
code.BR(Xscratch1);
|
||||
|
||||
code.l(fail);
|
||||
}
|
||||
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::FastDispatchHint, IR::LocationDescriptor, bool) {
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
|
||||
// TODO: Implement FastDispatchHint optimization
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
oaknut::Label pass = EmitA64Cond(code, ctx, terminal.if_);
|
||||
EmitA64Terminal(code, ctx, terminal.else_, initial_location, is_single_step);
|
||||
code.l(pass);
|
||||
EmitA64Terminal(code, ctx, terminal.then_, initial_location, is_single_step);
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
oaknut::Label fail;
|
||||
code.LDRB(Wscratch0, SP, offsetof(StackLayout, check_bit));
|
||||
code.CBZ(Wscratch0, fail);
|
||||
EmitA64Terminal(code, ctx, terminal.then_, initial_location, is_single_step);
|
||||
code.l(fail);
|
||||
EmitA64Terminal(code, ctx, terminal.else_, initial_location, is_single_step);
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
oaknut::Label fail;
|
||||
code.LDAR(Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch0, fail);
|
||||
EmitA64Terminal(code, ctx, terminal.else_, initial_location, is_single_step);
|
||||
code.l(fail);
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
boost::apply_visitor([&](const auto& t) { EmitA64Terminal(code, ctx, t, initial_location, is_single_step); }, terminal);
|
||||
}
|
||||
|
||||
void EmitA64Terminal(oaknut::CodeGenerator& code, EmitContext& ctx) {
|
||||
const A64::LocationDescriptor location{ctx.block.Location()};
|
||||
EmitA64Terminal(code, ctx, ctx.block.GetTerminal(), location.SetSingleStepping(false), location.SingleStepping());
|
||||
}
|
||||
|
||||
void EmitA64ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx) {
|
||||
const A64::LocationDescriptor location{ctx.block.Location()};
|
||||
EmitA64Terminal(code, ctx, IR::Term::LinkBlock{ctx.block.ConditionFailedLocation()}, location.SetSingleStepping(false), location.SingleStepping());
|
||||
}
|
||||
|
||||
void EmitA64CheckMemoryAbort(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, oaknut::Label& end) {
|
||||
if (!ctx.conf.check_halt_on_memory_access) {
|
||||
return;
|
||||
}
|
||||
|
||||
const A64::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}};
|
||||
|
||||
code.LDAR(Xscratch0, Xhalt);
|
||||
code.TST(Xscratch0, static_cast<u32>(HaltReason::MemoryAbort));
|
||||
code.B(EQ, end);
|
||||
code.MOV(Xscratch0, current_location.PC());
|
||||
code.STR(Xscratch0, Xstate, offsetof(A64JitState, pc));
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnFromRunCode);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetCheckBit>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (args[0].IsImmediate()) {
|
||||
if (args[0].GetImmediateU1()) {
|
||||
code.MOV(Wscratch0, 1);
|
||||
code.STRB(Wscratch0, SP, offsetof(StackLayout, check_bit));
|
||||
} else {
|
||||
code.STRB(WZR, SP, offsetof(StackLayout, check_bit));
|
||||
}
|
||||
} else {
|
||||
auto Wbit = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wbit);
|
||||
code.STRB(Wbit, SP, offsetof(StackLayout, check_bit));
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetCFlag>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto Wflag = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wflag);
|
||||
code.LDR(Wflag, Xstate, offsetof(A64JitState, cpsr_nzcv));
|
||||
code.AND(Wflag, Wflag, 1 << 29);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetNZCVRaw>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto Wnzcv = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wnzcv);
|
||||
|
||||
code.LDR(Wnzcv, Xstate, offsetof(A64JitState, cpsr_nzcv));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetNZCVRaw>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wnzcv);
|
||||
|
||||
code.STR(Wnzcv, Xstate, offsetof(A64JitState, cpsr_nzcv));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetNZCV>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wnzcv = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wnzcv);
|
||||
|
||||
code.STR(Wnzcv, Xstate, offsetof(A64JitState, cpsr_nzcv));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetW>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||
|
||||
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wresult);
|
||||
|
||||
// TODO: Detect if Gpr vs Fpr is more appropriate
|
||||
|
||||
code.LDR(Wresult, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetX>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
RegAlloc::Realize(Xresult);
|
||||
|
||||
// TODO: Detect if Gpr vs Fpr is more appropriate
|
||||
|
||||
code.LDR(Xresult, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetS>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
auto Sresult = ctx.reg_alloc.WriteS(inst);
|
||||
RegAlloc::Realize(Sresult);
|
||||
code.LDR(Sresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetD>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
auto Dresult = ctx.reg_alloc.WriteD(inst);
|
||||
RegAlloc::Realize(Dresult);
|
||||
code.LDR(Dresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetQ>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
auto Qresult = ctx.reg_alloc.WriteQ(inst);
|
||||
RegAlloc::Realize(Qresult);
|
||||
code.LDR(Qresult, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetSP>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
RegAlloc::Realize(Xresult);
|
||||
|
||||
code.LDR(Xresult, Xstate, offsetof(A64JitState, sp));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetFPCR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wresult);
|
||||
|
||||
code.LDR(Wresult, Xstate, offsetof(A64JitState, fpcr));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetFPSR>(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wresult);
|
||||
|
||||
ctx.fpsr.GetFpsr(Wresult);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetW>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
auto Wvalue = ctx.reg_alloc.ReadW(args[1]);
|
||||
RegAlloc::Realize(Wvalue);
|
||||
|
||||
// TODO: Detect if Gpr vs Fpr is more appropriate
|
||||
code.MOV(*Wvalue, Wvalue);
|
||||
code.STR(Wvalue->toX(), Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetX>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
auto Xvalue = ctx.reg_alloc.ReadX(args[1]);
|
||||
RegAlloc::Realize(Xvalue);
|
||||
|
||||
// TODO: Detect if Gpr vs Fpr is more appropriate
|
||||
|
||||
code.STR(Xvalue, Xstate, offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetS>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
auto Svalue = ctx.reg_alloc.ReadS(args[1]);
|
||||
RegAlloc::Realize(Svalue);
|
||||
|
||||
code.FMOV(Svalue, Svalue);
|
||||
code.STR(Svalue->toQ(), Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetD>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
auto Dvalue = ctx.reg_alloc.ReadD(args[1]);
|
||||
RegAlloc::Realize(Dvalue);
|
||||
|
||||
code.FMOV(Dvalue, Dvalue);
|
||||
code.STR(Dvalue->toQ(), Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetQ>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
auto Qvalue = ctx.reg_alloc.ReadQ(args[1]);
|
||||
RegAlloc::Realize(Qvalue);
|
||||
code.STR(Qvalue, Xstate, offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetSP>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
|
||||
RegAlloc::Realize(Xvalue);
|
||||
code.STR(Xvalue, Xstate, offsetof(A64JitState, sp));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetFPCR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wvalue = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wvalue);
|
||||
code.STR(Wvalue, Xstate, offsetof(A64JitState, fpcr));
|
||||
code.MSR(oaknut::SystemReg::FPCR, Wvalue->toX());
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetFPSR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wvalue = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wvalue);
|
||||
code.STR(Wvalue, Xstate, offsetof(A64JitState, fpsr));
|
||||
code.MSR(oaknut::SystemReg::FPSR, Wvalue->toX());
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetPC>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
|
||||
RegAlloc::Realize(Xvalue);
|
||||
code.STR(Xvalue, Xstate, offsetof(A64JitState, pc));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64CallSupervisor>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall();
|
||||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.SUB(X1, X1, Xticks);
|
||||
EmitRelocation(code, ctx, LinkTarget::AddTicks);
|
||||
}
|
||||
|
||||
code.MOV(W1, args[0].GetImmediateU32());
|
||||
EmitRelocation(code, ctx, LinkTarget::CallSVC);
|
||||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining);
|
||||
code.STR(X0, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.MOV(Xticks, X0);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExceptionRaised>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall();
|
||||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.SUB(X1, X1, Xticks);
|
||||
EmitRelocation(code, ctx, LinkTarget::AddTicks);
|
||||
}
|
||||
|
||||
code.MOV(X1, args[0].GetImmediateU64());
|
||||
code.MOV(X2, args[1].GetImmediateU64());
|
||||
EmitRelocation(code, ctx, LinkTarget::ExceptionRaised);
|
||||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining);
|
||||
code.STR(X0, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.MOV(Xticks, X0);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64DataCacheOperationRaised>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall({}, args[1], args[2]);
|
||||
EmitRelocation(code, ctx, LinkTarget::DataCacheOperationRaised);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64InstructionCacheOperationRaised>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall({}, args[0], args[1]);
|
||||
EmitRelocation(code, ctx, LinkTarget::InstructionCacheOperationRaised);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64DataSynchronizationBarrier>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
|
||||
code.DSB(oaknut::BarrierOp::SY);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64DataMemoryBarrier>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
|
||||
code.DMB(oaknut::BarrierOp::SY);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64InstructionSynchronizationBarrier>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst*) {
|
||||
if (!ctx.conf.hook_isb) {
|
||||
return;
|
||||
}
|
||||
|
||||
ctx.reg_alloc.PrepareForCall();
|
||||
EmitRelocation(code, ctx, LinkTarget::InstructionSynchronizationBarrierRaised);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetCNTFRQ>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto Xvalue = ctx.reg_alloc.WriteX(inst);
|
||||
RegAlloc::Realize(Xvalue);
|
||||
code.MOV(Xvalue, ctx.conf.cntfreq_el0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetCNTPCT>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
ctx.reg_alloc.PrepareForCall();
|
||||
if (!ctx.conf.wall_clock_cntpct && ctx.conf.enable_cycle_counting) {
|
||||
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.SUB(X1, X1, Xticks);
|
||||
EmitRelocation(code, ctx, LinkTarget::AddTicks);
|
||||
EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining);
|
||||
code.STR(X0, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.MOV(Xticks, X0);
|
||||
}
|
||||
EmitRelocation(code, ctx, LinkTarget::GetCNTPCT);
|
||||
ctx.reg_alloc.DefineAsRegister(inst, X0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetCTR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto Wvalue = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wvalue);
|
||||
code.MOV(Wvalue, ctx.conf.ctr_el0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetDCZID>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto Wvalue = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Wvalue);
|
||||
code.MOV(Wvalue, ctx.conf.dczid_el0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetTPIDR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto Xvalue = ctx.reg_alloc.WriteX(inst);
|
||||
RegAlloc::Realize(Xvalue);
|
||||
code.MOV(Xscratch0, mcl::bit_cast<u64>(ctx.conf.tpidr_el0));
|
||||
code.LDR(Xvalue, Xscratch0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetTPIDRRO>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto Xvalue = ctx.reg_alloc.WriteX(inst);
|
||||
RegAlloc::Realize(Xvalue);
|
||||
code.MOV(Xscratch0, mcl::bit_cast<u64>(ctx.conf.tpidrro_el0));
|
||||
code.LDR(Xvalue, Xscratch0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetTPIDR>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
|
||||
RegAlloc::Realize(Xvalue);
|
||||
code.MOV(Xscratch0, mcl::bit_cast<u64>(ctx.conf.tpidr_el0));
|
||||
code.STR(Xvalue, Xscratch0);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
|
|
@ -0,0 +1,128 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/a64_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64_memory.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/ir/acc_type.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
using namespace oaknut::util;
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ClearExclusive>(oaknut::CodeGenerator& code, EmitContext&, IR::Inst*) {
|
||||
code.STR(WZR, Xstate, offsetof(A64JitState, exclusive_state));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ReadMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitReadMemory<8>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ReadMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitReadMemory<16>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ReadMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitReadMemory<32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ReadMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitReadMemory<64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ReadMemory128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitReadMemory<128>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveReadMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitExclusiveReadMemory<8>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveReadMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitExclusiveReadMemory<16>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveReadMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitExclusiveReadMemory<32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveReadMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitExclusiveReadMemory<64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveReadMemory128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitExclusiveReadMemory<128>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64WriteMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitWriteMemory<8>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64WriteMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitWriteMemory<16>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64WriteMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitWriteMemory<32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64WriteMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitWriteMemory<64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64WriteMemory128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitWriteMemory<128>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitExclusiveWriteMemory<8>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitExclusiveWriteMemory<16>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitExclusiveWriteMemory<32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitExclusiveWriteMemory<64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitExclusiveWriteMemory<128>(code, ctx, inst);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
|
|
@ -0,0 +1,166 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
using namespace oaknut::util;
|
||||
|
||||
template<size_t bitsize, typename EmitFn>
|
||||
static void EmitCRC(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit_fn) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
auto Woutput = ctx.reg_alloc.WriteW(inst);
|
||||
auto Winput = ctx.reg_alloc.ReadW(args[0]);
|
||||
auto Rdata = ctx.reg_alloc.ReadReg<bitsize>(args[1]);
|
||||
RegAlloc::Realize(Woutput, Winput, Rdata);
|
||||
|
||||
emit_fn(Woutput, Winput, Rdata);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CRC32Castagnoli8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitCRC<32>(code, ctx, inst, [&](auto& Woutput, auto& Winput, auto& Wdata) { code.CRC32CB(Woutput, Winput, Wdata); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CRC32Castagnoli16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitCRC<32>(code, ctx, inst, [&](auto& Woutput, auto& Winput, auto& Wdata) { code.CRC32CH(Woutput, Winput, Wdata); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CRC32Castagnoli32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitCRC<32>(code, ctx, inst, [&](auto& Woutput, auto& Winput, auto& Wdata) { code.CRC32CW(Woutput, Winput, Wdata); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CRC32Castagnoli64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitCRC<64>(code, ctx, inst, [&](auto& Woutput, auto& Winput, auto& Xdata) { code.CRC32CX(Woutput, Winput, Xdata); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CRC32ISO8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitCRC<32>(code, ctx, inst, [&](auto& Woutput, auto& Winput, auto& Wdata) { code.CRC32B(Woutput, Winput, Wdata); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CRC32ISO16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitCRC<32>(code, ctx, inst, [&](auto& Woutput, auto& Winput, auto& Wdata) { code.CRC32H(Woutput, Winput, Wdata); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CRC32ISO32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitCRC<32>(code, ctx, inst, [&](auto& Woutput, auto& Winput, auto& Wdata) { code.CRC32W(Woutput, Winput, Wdata); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CRC32ISO64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitCRC<64>(code, ctx, inst, [&](auto& Woutput, auto& Winput, auto& Xdata) { code.CRC32X(Woutput, Winput, Xdata); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::AESDecryptSingleRound>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Qoutput = ctx.reg_alloc.WriteQ(inst);
|
||||
auto Qinput = ctx.reg_alloc.ReadQ(args[0]);
|
||||
RegAlloc::Realize(Qoutput, Qinput);
|
||||
|
||||
code.MOVI(Qoutput->toD(), oaknut::RepImm{0});
|
||||
code.AESD(Qoutput->B16(), Qinput->B16());
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::AESEncryptSingleRound>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Qoutput = ctx.reg_alloc.WriteQ(inst);
|
||||
auto Qinput = ctx.reg_alloc.ReadQ(args[0]);
|
||||
RegAlloc::Realize(Qoutput, Qinput);
|
||||
|
||||
code.MOVI(Qoutput->toD(), oaknut::RepImm{0});
|
||||
code.AESE(Qoutput->B16(), Qinput->B16());
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::AESInverseMixColumns>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Qoutput = ctx.reg_alloc.WriteQ(inst);
|
||||
auto Qinput = ctx.reg_alloc.ReadQ(args[0]);
|
||||
RegAlloc::Realize(Qoutput, Qinput);
|
||||
|
||||
code.AESIMC(Qoutput->B16(), Qinput->B16());
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::AESMixColumns>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Qoutput = ctx.reg_alloc.WriteQ(inst);
|
||||
auto Qinput = ctx.reg_alloc.ReadQ(args[0]);
|
||||
RegAlloc::Realize(Qoutput, Qinput);
|
||||
|
||||
code.AESMC(Qoutput->B16(), Qinput->B16());
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SM4AccessSubstitutionBox>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SHA256Hash>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const bool part1 = args[3].GetImmediateU1();
|
||||
|
||||
if (part1) {
|
||||
auto Qx = ctx.reg_alloc.ReadWriteQ(args[0], inst);
|
||||
auto Qy = ctx.reg_alloc.ReadQ(args[1]);
|
||||
auto Qz = ctx.reg_alloc.ReadQ(args[2]);
|
||||
RegAlloc::Realize(Qx, Qy, Qz);
|
||||
|
||||
code.SHA256H(Qx, Qy, Qz->S4());
|
||||
} else {
|
||||
auto Qx = ctx.reg_alloc.ReadQ(args[0]);
|
||||
auto Qy = ctx.reg_alloc.ReadWriteQ(args[1], inst);
|
||||
auto Qz = ctx.reg_alloc.ReadQ(args[2]);
|
||||
RegAlloc::Realize(Qx, Qy, Qz);
|
||||
|
||||
code.SHA256H2(Qy, Qx, Qz->S4()); // Yes x and y are swapped
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SHA256MessageSchedule0>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Qa = ctx.reg_alloc.ReadWriteQ(args[0], inst);
|
||||
auto Qb = ctx.reg_alloc.ReadQ(args[1]);
|
||||
RegAlloc::Realize(Qa, Qb);
|
||||
|
||||
code.SHA256SU0(Qa->S4(), Qb->S4());
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SHA256MessageSchedule1>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Qa = ctx.reg_alloc.ReadWriteQ(args[0], inst);
|
||||
auto Qb = ctx.reg_alloc.ReadQ(args[1]);
|
||||
auto Qc = ctx.reg_alloc.ReadQ(args[2]);
|
||||
RegAlloc::Realize(Qa, Qb, Qc);
|
||||
|
||||
code.SHA256SU1(Qa->S4(), Qb->S4(), Qc->S4());
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,801 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/common/fp/fpcr.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
using namespace oaknut::util;
|
||||
|
||||
template<size_t bitsize, typename EmitFn>
|
||||
static void EmitTwoOp(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Vresult = ctx.reg_alloc.WriteVec<bitsize>(inst);
|
||||
auto Voperand = ctx.reg_alloc.ReadVec<bitsize>(args[0]);
|
||||
RegAlloc::Realize(Vresult, Voperand);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
emit(Vresult, Voperand);
|
||||
}
|
||||
|
||||
template<size_t bitsize, typename EmitFn>
|
||||
static void EmitThreeOp(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Vresult = ctx.reg_alloc.WriteVec<bitsize>(inst);
|
||||
auto Va = ctx.reg_alloc.ReadVec<bitsize>(args[0]);
|
||||
auto Vb = ctx.reg_alloc.ReadVec<bitsize>(args[1]);
|
||||
RegAlloc::Realize(Vresult, Va, Vb);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
emit(Vresult, Va, Vb);
|
||||
}
|
||||
|
||||
template<size_t bitsize, typename EmitFn>
|
||||
static void EmitFourOp(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Vresult = ctx.reg_alloc.WriteVec<bitsize>(inst);
|
||||
auto Va = ctx.reg_alloc.ReadVec<bitsize>(args[0]);
|
||||
auto Vb = ctx.reg_alloc.ReadVec<bitsize>(args[1]);
|
||||
auto Vc = ctx.reg_alloc.ReadVec<bitsize>(args[2]);
|
||||
RegAlloc::Realize(Vresult, Va, Vb, Vc);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
emit(Vresult, Va, Vb, Vc);
|
||||
}
|
||||
|
||||
template<size_t bitsize_from, size_t bitsize_to, typename EmitFn>
|
||||
static void EmitConvert(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Vto = ctx.reg_alloc.WriteVec<bitsize_to>(inst);
|
||||
auto Vfrom = ctx.reg_alloc.ReadVec<bitsize_from>(args[0]);
|
||||
const auto rounding_mode = static_cast<FP::RoundingMode>(args[1].GetImmediateU8());
|
||||
RegAlloc::Realize(Vto, Vfrom);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
ASSERT(rounding_mode == ctx.FPCR().RMode());
|
||||
|
||||
emit(Vto, Vfrom);
|
||||
}
|
||||
|
||||
template<size_t bitsize_from, size_t bitsize_to, bool is_signed>
|
||||
static void EmitToFixed(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Rto = ctx.reg_alloc.WriteReg<std::max<size_t>(bitsize_to, 32)>(inst);
|
||||
auto Vfrom = ctx.reg_alloc.ReadVec<bitsize_from>(args[0]);
|
||||
const size_t fbits = args[1].GetImmediateU8();
|
||||
const auto rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||
RegAlloc::Realize(Rto, Vfrom);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
if (rounding_mode == FP::RoundingMode::TowardsZero) {
|
||||
if constexpr (is_signed) {
|
||||
if constexpr (bitsize_to == 16) {
|
||||
code.FCVTZS(Rto, Vfrom, fbits + 16);
|
||||
code.ASR(Wscratch0, Rto, 31);
|
||||
code.ADD(Rto, Rto, Wscratch0, LSR, 16); // Round towards zero when truncating
|
||||
code.LSR(Rto, Rto, 16);
|
||||
} else if (fbits) {
|
||||
code.FCVTZS(Rto, Vfrom, fbits);
|
||||
} else {
|
||||
code.FCVTZS(Rto, Vfrom);
|
||||
}
|
||||
} else {
|
||||
if constexpr (bitsize_to == 16) {
|
||||
code.FCVTZU(Rto, Vfrom, fbits + 16);
|
||||
code.LSR(Rto, Rto, 16);
|
||||
} else if (fbits) {
|
||||
code.FCVTZU(Rto, Vfrom, fbits);
|
||||
} else {
|
||||
code.FCVTZU(Rto, Vfrom);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ASSERT(fbits == 0);
|
||||
ASSERT(bitsize_to != 16);
|
||||
if constexpr (is_signed) {
|
||||
switch (rounding_mode) {
|
||||
case FP::RoundingMode::ToNearest_TieEven:
|
||||
code.FCVTNS(Rto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsPlusInfinity:
|
||||
code.FCVTPS(Rto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsMinusInfinity:
|
||||
code.FCVTMS(Rto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsZero:
|
||||
code.FCVTZS(Rto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::ToNearest_TieAwayFromZero:
|
||||
code.FCVTAS(Rto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::ToOdd:
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid RoundingMode");
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (rounding_mode) {
|
||||
case FP::RoundingMode::ToNearest_TieEven:
|
||||
code.FCVTNU(Rto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsPlusInfinity:
|
||||
code.FCVTPU(Rto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsMinusInfinity:
|
||||
code.FCVTMU(Rto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsZero:
|
||||
code.FCVTZU(Rto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::ToNearest_TieAwayFromZero:
|
||||
code.FCVTAU(Rto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::ToOdd:
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid RoundingMode");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<size_t bitsize_from, size_t bitsize_to, typename EmitFn>
|
||||
static void EmitFromFixed(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Vto = ctx.reg_alloc.WriteVec<bitsize_to>(inst);
|
||||
auto Rfrom = ctx.reg_alloc.ReadReg<std::max<size_t>(bitsize_from, 32)>(args[0]);
|
||||
const size_t fbits = args[1].GetImmediateU8();
|
||||
const auto rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||
RegAlloc::Realize(Vto, Rfrom);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
if (rounding_mode == ctx.FPCR().RMode()) {
|
||||
emit(Vto, Rfrom, fbits);
|
||||
} else {
|
||||
FP::FPCR new_fpcr = ctx.FPCR();
|
||||
new_fpcr.RMode(rounding_mode);
|
||||
|
||||
code.MOV(Wscratch0, new_fpcr.Value());
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
|
||||
emit(Vto, Rfrom, fbits);
|
||||
|
||||
code.MOV(Wscratch0, ctx.FPCR().Value());
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPAbs16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPAbs32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Soperand) { code.FABS(Sresult, Soperand); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPAbs64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Doperand) { code.FABS(Dresult, Doperand); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPAdd32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FADD(Sresult, Sa, Sb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPAdd64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FADD(Dresult, Da, Db); });
|
||||
}
|
||||
|
||||
template<size_t size>
|
||||
void EmitCompare(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto flags = ctx.reg_alloc.WriteFlags(inst);
|
||||
auto Va = ctx.reg_alloc.ReadVec<size>(args[0]);
|
||||
const bool exc_on_qnan = args[2].GetImmediateU1();
|
||||
|
||||
if (args[1].IsImmediate() && args[1].GetImmediateU64() == 0) {
|
||||
RegAlloc::Realize(flags, Va);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
if (exc_on_qnan) {
|
||||
code.FCMPE(Va, 0);
|
||||
} else {
|
||||
code.FCMP(Va, 0);
|
||||
}
|
||||
} else {
|
||||
auto Vb = ctx.reg_alloc.ReadVec<size>(args[1]);
|
||||
RegAlloc::Realize(flags, Va, Vb);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
if (exc_on_qnan) {
|
||||
code.FCMPE(Va, Vb);
|
||||
} else {
|
||||
code.FCMP(Va, Vb);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPCompare32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitCompare<32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPCompare64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitCompare<64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDiv32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FDIV(Sresult, Sa, Sb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDiv64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FDIV(Dresult, Da, Db); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMax32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FMAX(Sresult, Sa, Sb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMax64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FMAX(Dresult, Da, Db); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMaxNumeric32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FMAXNM(Sresult, Sa, Sb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMaxNumeric64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FMAXNM(Dresult, Da, Db); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMin32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FMIN(Sresult, Sa, Sb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMin64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FMIN(Dresult, Da, Db); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMinNumeric32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FMINNM(Sresult, Sa, Sb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMinNumeric64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FMINNM(Dresult, Da, Db); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMul32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FMUL(Sresult, Sa, Sb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMul64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FMUL(Dresult, Da, Db); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMulAdd16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMulAdd32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFourOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& S1, auto& S2) { code.FMADD(Sresult, S1, S2, Sa); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMulAdd64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFourOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& D1, auto& D2) { code.FMADD(Dresult, D1, D2, Da); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMulSub16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMulSub32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFourOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& S1, auto& S2) { code.FMSUB(Sresult, S1, S2, Sa); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMulSub64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFourOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& D1, auto& D2) { code.FMSUB(Dresult, D1, D2, Da); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMulX32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FMULX(Sresult, Sa, Sb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMulX64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FMULX(Dresult, Da, Db); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPNeg16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPNeg32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Soperand) { code.FNEG(Sresult, Soperand); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPNeg64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Doperand) { code.FNEG(Dresult, Doperand); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRecipEstimate16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRecipEstimate32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Soperand) { code.FRECPE(Sresult, Soperand); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRecipEstimate64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Doperand) { code.FRECPE(Dresult, Doperand); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRecipExponent16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRecipExponent32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Soperand) { code.FRECPX(Sresult, Soperand); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRecipExponent64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Doperand) { code.FRECPX(Dresult, Doperand); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRecipStepFused16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRecipStepFused32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FRECPS(Sresult, Sa, Sb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRecipStepFused64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FRECPS(Dresult, Da, Db); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRoundInt16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRoundInt32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto rounding_mode = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
|
||||
const bool exact = inst->GetArg(2).GetU1();
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Sresult = ctx.reg_alloc.WriteS(inst);
|
||||
auto Soperand = ctx.reg_alloc.ReadS(args[0]);
|
||||
RegAlloc::Realize(Sresult, Soperand);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
if (exact) {
|
||||
ASSERT(ctx.FPCR().RMode() == rounding_mode);
|
||||
code.FRINTX(Sresult, Soperand);
|
||||
} else {
|
||||
switch (rounding_mode) {
|
||||
case FP::RoundingMode::ToNearest_TieEven:
|
||||
code.FRINTN(Sresult, Soperand);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsPlusInfinity:
|
||||
code.FRINTP(Sresult, Soperand);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsMinusInfinity:
|
||||
code.FRINTM(Sresult, Soperand);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsZero:
|
||||
code.FRINTZ(Sresult, Soperand);
|
||||
break;
|
||||
case FP::RoundingMode::ToNearest_TieAwayFromZero:
|
||||
code.FRINTA(Sresult, Soperand);
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid RoundingMode");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRoundInt64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto rounding_mode = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
|
||||
const bool exact = inst->GetArg(2).GetU1();
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Dresult = ctx.reg_alloc.WriteD(inst);
|
||||
auto Doperand = ctx.reg_alloc.ReadD(args[0]);
|
||||
RegAlloc::Realize(Dresult, Doperand);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
if (exact) {
|
||||
ASSERT(ctx.FPCR().RMode() == rounding_mode);
|
||||
code.FRINTX(Dresult, Doperand);
|
||||
} else {
|
||||
switch (rounding_mode) {
|
||||
case FP::RoundingMode::ToNearest_TieEven:
|
||||
code.FRINTN(Dresult, Doperand);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsPlusInfinity:
|
||||
code.FRINTP(Dresult, Doperand);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsMinusInfinity:
|
||||
code.FRINTM(Dresult, Doperand);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsZero:
|
||||
code.FRINTZ(Dresult, Doperand);
|
||||
break;
|
||||
case FP::RoundingMode::ToNearest_TieAwayFromZero:
|
||||
code.FRINTA(Dresult, Doperand);
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid RoundingMode");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRSqrtEstimate16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRSqrtEstimate32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Soperand) { code.FRSQRTE(Sresult, Soperand); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRSqrtEstimate64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Doperand) { code.FRSQRTE(Dresult, Doperand); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRSqrtStepFused16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRSqrtStepFused32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FRSQRTS(Sresult, Sa, Sb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRSqrtStepFused64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FRSQRTS(Dresult, Da, Db); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSqrt32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Soperand) { code.FSQRT(Sresult, Soperand); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSqrt64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Doperand) { code.FSQRT(Dresult, Doperand); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSub32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<32>(code, ctx, inst, [&](auto& Sresult, auto& Sa, auto& Sb) { code.FSUB(Sresult, Sa, Sb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSub64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp<64>(code, ctx, inst, [&](auto& Dresult, auto& Da, auto& Db) { code.FSUB(Dresult, Da, Db); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPHalfToDouble>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitConvert<16, 64>(code, ctx, inst, [&](auto& Dto, auto& Hfrom) { code.FCVT(Dto, Hfrom); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPHalfToSingle>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitConvert<16, 32>(code, ctx, inst, [&](auto& Sto, auto& Hfrom) { code.FCVT(Sto, Hfrom); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSingleToDouble>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitConvert<32, 64>(code, ctx, inst, [&](auto& Dto, auto& Sfrom) { code.FCVT(Dto, Sfrom); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSingleToHalf>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitConvert<32, 16>(code, ctx, inst, [&](auto& Hto, auto& Sfrom) { code.FCVT(Hto, Sfrom); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDoubleToHalf>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitConvert<64, 16>(code, ctx, inst, [&](auto& Hto, auto& Dfrom) { code.FCVT(Hto, Dfrom); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDoubleToSingle>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto rounding_mode = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
|
||||
|
||||
if (rounding_mode == FP::RoundingMode::ToOdd) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Sto = ctx.reg_alloc.WriteS(inst);
|
||||
auto Dfrom = ctx.reg_alloc.ReadD(args[0]);
|
||||
RegAlloc::Realize(Sto, Dfrom);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
code.FCVTXN(Sto, Dfrom);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
EmitConvert<64, 32>(code, ctx, inst, [&](auto& Sto, auto& Dfrom) { code.FCVT(Sto, Dfrom); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDoubleToFixedS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitToFixed<64, 16, true>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDoubleToFixedS32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitToFixed<64, 32, true>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDoubleToFixedS64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
// TODO: Consider fpr source
|
||||
EmitToFixed<64, 64, true>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDoubleToFixedU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitToFixed<64, 16, false>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDoubleToFixedU32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitToFixed<64, 32, false>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDoubleToFixedU64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
// TODO: Consider fpr source
|
||||
EmitToFixed<64, 64, false>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPHalfToFixedS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPHalfToFixedS32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPHalfToFixedS64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPHalfToFixedU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPHalfToFixedU32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPHalfToFixedU64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSingleToFixedS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitToFixed<32, 16, true>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSingleToFixedS32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
// TODO: Consider fpr source
|
||||
EmitToFixed<32, 32, true>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSingleToFixedS64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitToFixed<32, 64, true>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSingleToFixedU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitToFixed<32, 16, false>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSingleToFixedU32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
// TODO: Consider fpr source
|
||||
EmitToFixed<32, 32, false>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSingleToFixedU64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitToFixed<32, 64, false>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedU16ToSingle>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFromFixed<16, 32>(code, ctx, inst, [&](auto& Sto, auto& Wfrom, u8 fbits) {
|
||||
code.LSL(Wscratch0, Wfrom, 16);
|
||||
code.UCVTF(Sto, Wscratch0, fbits + 16);
|
||||
});
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedS16ToSingle>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFromFixed<16, 32>(code, ctx, inst, [&](auto& Sto, auto& Wfrom, u8 fbits) {
|
||||
code.LSL(Wscratch0, Wfrom, 16);
|
||||
code.SCVTF(Sto, Wscratch0, fbits + 16);
|
||||
});
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedU16ToDouble>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFromFixed<16, 64>(code, ctx, inst, [&](auto& Dto, auto& Wfrom, u8 fbits) {
|
||||
code.LSL(Wscratch0, Wfrom, 16);
|
||||
code.UCVTF(Dto, Wscratch0, fbits + 16);
|
||||
});
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedS16ToDouble>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFromFixed<16, 64>(code, ctx, inst, [&](auto& Dto, auto& Wfrom, u8 fbits) {
|
||||
code.LSL(Wscratch0, Wfrom, 16);
|
||||
code.SCVTF(Dto, Wscratch0, fbits + 16);
|
||||
});
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedU32ToSingle>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
// TODO: Consider fpr source
|
||||
EmitFromFixed<32, 32>(code, ctx, inst, [&](auto& Sto, auto& Wfrom, u8 fbits) { fbits ? code.UCVTF(Sto, Wfrom, fbits) : code.UCVTF(Sto, Wfrom); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedS32ToSingle>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
// TODO: Consider fpr source
|
||||
EmitFromFixed<32, 32>(code, ctx, inst, [&](auto& Sto, auto& Wfrom, u8 fbits) { fbits ? code.SCVTF(Sto, Wfrom, fbits) : code.SCVTF(Sto, Wfrom); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedU32ToDouble>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFromFixed<32, 64>(code, ctx, inst, [&](auto& Dto, auto& Wfrom, u8 fbits) { fbits ? code.UCVTF(Dto, Wfrom, fbits) : code.UCVTF(Dto, Wfrom); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedS32ToDouble>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFromFixed<32, 64>(code, ctx, inst, [&](auto& Dto, auto& Wfrom, u8 fbits) { fbits ? code.SCVTF(Dto, Wfrom, fbits) : code.SCVTF(Dto, Wfrom); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedU64ToDouble>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
// TODO: Consider fpr source
|
||||
EmitFromFixed<64, 64>(code, ctx, inst, [&](auto& Dto, auto& Xfrom, u8 fbits) { fbits ? code.UCVTF(Dto, Xfrom, fbits) : code.UCVTF(Dto, Xfrom); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedU64ToSingle>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFromFixed<64, 32>(code, ctx, inst, [&](auto& Sto, auto& Xfrom, u8 fbits) { fbits ? code.UCVTF(Sto, Xfrom, fbits) : code.UCVTF(Sto, Xfrom); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedS64ToDouble>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
// TODO: Consider fpr source
|
||||
EmitFromFixed<64, 64>(code, ctx, inst, [&](auto& Dto, auto& Xfrom, u8 fbits) { fbits ? code.SCVTF(Dto, Xfrom, fbits) : code.SCVTF(Dto, Xfrom); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedS64ToSingle>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFromFixed<64, 32>(code, ctx, inst, [&](auto& Sto, auto& Xfrom, u8 fbits) { fbits ? code.SCVTF(Sto, Xfrom, fbits) : code.SCVTF(Sto, Xfrom); });
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
683
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_memory.cpp
Normal file
683
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_memory.cpp
Normal file
|
|
@ -0,0 +1,683 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/arm64/emit_arm64_memory.h"
|
||||
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
|
||||
#include <mcl/bit_cast.hpp>
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/fastmem.h"
|
||||
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/ir/acc_type.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
using namespace oaknut::util;
|
||||
|
||||
namespace {
|
||||
|
||||
bool IsOrdered(IR::AccType acctype) {
|
||||
return acctype == IR::AccType::ORDERED || acctype == IR::AccType::ORDEREDRW || acctype == IR::AccType::LIMITEDORDERED;
|
||||
}
|
||||
|
||||
LinkTarget ReadMemoryLinkTarget(size_t bitsize) {
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
return LinkTarget::ReadMemory8;
|
||||
case 16:
|
||||
return LinkTarget::ReadMemory16;
|
||||
case 32:
|
||||
return LinkTarget::ReadMemory32;
|
||||
case 64:
|
||||
return LinkTarget::ReadMemory64;
|
||||
case 128:
|
||||
return LinkTarget::ReadMemory128;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
LinkTarget WriteMemoryLinkTarget(size_t bitsize) {
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
return LinkTarget::WriteMemory8;
|
||||
case 16:
|
||||
return LinkTarget::WriteMemory16;
|
||||
case 32:
|
||||
return LinkTarget::WriteMemory32;
|
||||
case 64:
|
||||
return LinkTarget::WriteMemory64;
|
||||
case 128:
|
||||
return LinkTarget::WriteMemory128;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
LinkTarget WrappedReadMemoryLinkTarget(size_t bitsize) {
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
return LinkTarget::WrappedReadMemory8;
|
||||
case 16:
|
||||
return LinkTarget::WrappedReadMemory16;
|
||||
case 32:
|
||||
return LinkTarget::WrappedReadMemory32;
|
||||
case 64:
|
||||
return LinkTarget::WrappedReadMemory64;
|
||||
case 128:
|
||||
return LinkTarget::WrappedReadMemory128;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
LinkTarget WrappedWriteMemoryLinkTarget(size_t bitsize) {
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
return LinkTarget::WrappedWriteMemory8;
|
||||
case 16:
|
||||
return LinkTarget::WrappedWriteMemory16;
|
||||
case 32:
|
||||
return LinkTarget::WrappedWriteMemory32;
|
||||
case 64:
|
||||
return LinkTarget::WrappedWriteMemory64;
|
||||
case 128:
|
||||
return LinkTarget::WrappedWriteMemory128;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
LinkTarget ExclusiveReadMemoryLinkTarget(size_t bitsize) {
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
return LinkTarget::ExclusiveReadMemory8;
|
||||
case 16:
|
||||
return LinkTarget::ExclusiveReadMemory16;
|
||||
case 32:
|
||||
return LinkTarget::ExclusiveReadMemory32;
|
||||
case 64:
|
||||
return LinkTarget::ExclusiveReadMemory64;
|
||||
case 128:
|
||||
return LinkTarget::ExclusiveReadMemory128;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
LinkTarget ExclusiveWriteMemoryLinkTarget(size_t bitsize) {
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
return LinkTarget::ExclusiveWriteMemory8;
|
||||
case 16:
|
||||
return LinkTarget::ExclusiveWriteMemory16;
|
||||
case 32:
|
||||
return LinkTarget::ExclusiveWriteMemory32;
|
||||
case 64:
|
||||
return LinkTarget::ExclusiveWriteMemory64;
|
||||
case 128:
|
||||
return LinkTarget::ExclusiveWriteMemory128;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
template<size_t bitsize>
|
||||
void CallbackOnlyEmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall({}, args[1]);
|
||||
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
|
||||
|
||||
EmitRelocation(code, ctx, ReadMemoryLinkTarget(bitsize));
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
|
||||
if constexpr (bitsize == 128) {
|
||||
code.MOV(Q8.B16(), Q0.B16());
|
||||
ctx.reg_alloc.DefineAsRegister(inst, Q8);
|
||||
} else {
|
||||
ctx.reg_alloc.DefineAsRegister(inst, X0);
|
||||
}
|
||||
}
|
||||
|
||||
template<size_t bitsize>
|
||||
void CallbackOnlyEmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall({}, args[1]);
|
||||
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
|
||||
|
||||
code.MOV(Wscratch0, 1);
|
||||
code.STRB(Wscratch0, Xstate, ctx.conf.state_exclusive_state_offset);
|
||||
EmitRelocation(code, ctx, ExclusiveReadMemoryLinkTarget(bitsize));
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
|
||||
if constexpr (bitsize == 128) {
|
||||
code.MOV(Q8.B16(), Q0.B16());
|
||||
ctx.reg_alloc.DefineAsRegister(inst, Q8);
|
||||
} else {
|
||||
ctx.reg_alloc.DefineAsRegister(inst, X0);
|
||||
}
|
||||
}
|
||||
|
||||
template<size_t bitsize>
|
||||
void CallbackOnlyEmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall({}, args[1], args[2]);
|
||||
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
|
||||
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
EmitRelocation(code, ctx, WriteMemoryLinkTarget(bitsize));
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
}
|
||||
|
||||
template<size_t bitsize>
|
||||
void CallbackOnlyEmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall({}, args[1], args[2]);
|
||||
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
|
||||
|
||||
oaknut::Label end;
|
||||
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
code.MOV(W0, 1);
|
||||
code.LDRB(Wscratch0, Xstate, ctx.conf.state_exclusive_state_offset);
|
||||
code.CBZ(Wscratch0, end);
|
||||
code.STRB(WZR, Xstate, ctx.conf.state_exclusive_state_offset);
|
||||
EmitRelocation(code, ctx, ExclusiveWriteMemoryLinkTarget(bitsize));
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
code.l(end);
|
||||
ctx.reg_alloc.DefineAsRegister(inst, X0);
|
||||
}
|
||||
|
||||
constexpr size_t page_bits = 12;
|
||||
constexpr size_t page_size = 1 << page_bits;
|
||||
constexpr size_t page_mask = (1 << page_bits) - 1;
|
||||
|
||||
// This function may use Xscratch0 as a scratch register
|
||||
// Trashes NZCV
|
||||
template<size_t bitsize>
|
||||
void EmitDetectMisalignedVAddr(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) {
|
||||
static_assert(bitsize == 8 || bitsize == 16 || bitsize == 32 || bitsize == 64 || bitsize == 128);
|
||||
|
||||
if (bitsize == 8 || (ctx.conf.detect_misaligned_access_via_page_table & bitsize) == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) {
|
||||
const u64 align_mask = []() -> u64 {
|
||||
switch (bitsize) {
|
||||
case 16:
|
||||
return 0b1;
|
||||
case 32:
|
||||
return 0b11;
|
||||
case 64:
|
||||
return 0b111;
|
||||
case 128:
|
||||
return 0b1111;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
|
||||
code.TST(Xaddr, align_mask);
|
||||
code.B(NE, *fallback);
|
||||
} else {
|
||||
// If (addr & page_mask) > page_size - byte_size, use fallback.
|
||||
code.AND(Xscratch0, Xaddr, page_mask);
|
||||
code.CMP(Xscratch0, page_size - bitsize / 8);
|
||||
code.B(HI, *fallback);
|
||||
}
|
||||
}
|
||||
|
||||
// Outputs Xscratch0 = page_table[addr >> page_bits]
|
||||
// May use Xscratch1 as scratch register
|
||||
// Address to read/write = [ret0 + ret1], ret0 is always Xscratch0 and ret1 is either Xaddr or Xscratch1
|
||||
// Trashes NZCV
|
||||
template<size_t bitsize>
|
||||
std::pair<oaknut::XReg, oaknut::XReg> InlinePageTableEmitVAddrLookup(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) {
|
||||
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits;
|
||||
const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits;
|
||||
|
||||
EmitDetectMisalignedVAddr<bitsize>(code, ctx, Xaddr, fallback);
|
||||
|
||||
if (ctx.conf.silently_mirror_page_table || unused_top_bits == 0) {
|
||||
code.UBFX(Xscratch0, Xaddr, page_bits, valid_page_index_bits);
|
||||
} else {
|
||||
code.LSR(Xscratch0, Xaddr, page_bits);
|
||||
code.TST(Xscratch0, u64(~u64(0)) << valid_page_index_bits);
|
||||
code.B(NE, *fallback);
|
||||
}
|
||||
|
||||
code.LDR(Xscratch0, Xpagetable, Xscratch0, LSL, 3);
|
||||
|
||||
if (ctx.conf.page_table_pointer_mask_bits != 0) {
|
||||
const u64 mask = u64(~u64(0)) << ctx.conf.page_table_pointer_mask_bits;
|
||||
code.AND(Xscratch0, Xscratch0, mask);
|
||||
}
|
||||
|
||||
code.CBZ(Xscratch0, *fallback);
|
||||
|
||||
if (ctx.conf.absolute_offset_page_table) {
|
||||
return std::make_pair(Xscratch0, Xaddr);
|
||||
}
|
||||
code.AND(Xscratch1, Xaddr, page_mask);
|
||||
return std::make_pair(Xscratch0, Xscratch1);
|
||||
}
|
||||
|
||||
template<std::size_t bitsize>
|
||||
CodePtr EmitMemoryLdr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered, bool extend32 = false) {
|
||||
const auto index_ext = extend32 ? oaknut::IndexExt::UXTW : oaknut::IndexExt::LSL;
|
||||
const auto add_ext = extend32 ? oaknut::AddSubExt::UXTW : oaknut::AddSubExt::LSL;
|
||||
const auto Roffset = extend32 ? oaknut::RReg{Xoffset.toW()} : oaknut::RReg{Xoffset};
|
||||
|
||||
CodePtr fastmem_location = code.xptr<CodePtr>();
|
||||
|
||||
if (ordered) {
|
||||
code.ADD(Xscratch0, Xbase, Roffset, add_ext);
|
||||
|
||||
fastmem_location = code.xptr<CodePtr>();
|
||||
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
code.LDARB(oaknut::WReg{value_idx}, Xscratch0);
|
||||
break;
|
||||
case 16:
|
||||
code.LDARH(oaknut::WReg{value_idx}, Xscratch0);
|
||||
break;
|
||||
case 32:
|
||||
code.LDAR(oaknut::WReg{value_idx}, Xscratch0);
|
||||
break;
|
||||
case 64:
|
||||
code.LDAR(oaknut::XReg{value_idx}, Xscratch0);
|
||||
break;
|
||||
case 128:
|
||||
code.LDR(oaknut::QReg{value_idx}, Xscratch0);
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid bitsize");
|
||||
}
|
||||
} else {
|
||||
fastmem_location = code.xptr<CodePtr>();
|
||||
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
code.LDRB(oaknut::WReg{value_idx}, Xbase, Roffset, index_ext);
|
||||
break;
|
||||
case 16:
|
||||
code.LDRH(oaknut::WReg{value_idx}, Xbase, Roffset, index_ext);
|
||||
break;
|
||||
case 32:
|
||||
code.LDR(oaknut::WReg{value_idx}, Xbase, Roffset, index_ext);
|
||||
break;
|
||||
case 64:
|
||||
code.LDR(oaknut::XReg{value_idx}, Xbase, Roffset, index_ext);
|
||||
break;
|
||||
case 128:
|
||||
code.LDR(oaknut::QReg{value_idx}, Xbase, Roffset, index_ext);
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid bitsize");
|
||||
}
|
||||
}
|
||||
|
||||
return fastmem_location;
|
||||
}
|
||||
|
||||
template<std::size_t bitsize>
|
||||
CodePtr EmitMemoryStr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered, bool extend32 = false) {
|
||||
const auto index_ext = extend32 ? oaknut::IndexExt::UXTW : oaknut::IndexExt::LSL;
|
||||
const auto add_ext = extend32 ? oaknut::AddSubExt::UXTW : oaknut::AddSubExt::LSL;
|
||||
const auto Roffset = extend32 ? oaknut::RReg{Xoffset.toW()} : oaknut::RReg{Xoffset};
|
||||
|
||||
CodePtr fastmem_location;
|
||||
|
||||
if (ordered) {
|
||||
code.ADD(Xscratch0, Xbase, Roffset, add_ext);
|
||||
|
||||
fastmem_location = code.xptr<CodePtr>();
|
||||
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
code.STLRB(oaknut::WReg{value_idx}, Xscratch0);
|
||||
break;
|
||||
case 16:
|
||||
code.STLRH(oaknut::WReg{value_idx}, Xscratch0);
|
||||
break;
|
||||
case 32:
|
||||
code.STLR(oaknut::WReg{value_idx}, Xscratch0);
|
||||
break;
|
||||
case 64:
|
||||
code.STLR(oaknut::XReg{value_idx}, Xscratch0);
|
||||
break;
|
||||
case 128:
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
code.STR(oaknut::QReg{value_idx}, Xscratch0);
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid bitsize");
|
||||
}
|
||||
} else {
|
||||
fastmem_location = code.xptr<CodePtr>();
|
||||
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
code.STRB(oaknut::WReg{value_idx}, Xbase, Roffset, index_ext);
|
||||
break;
|
||||
case 16:
|
||||
code.STRH(oaknut::WReg{value_idx}, Xbase, Roffset, index_ext);
|
||||
break;
|
||||
case 32:
|
||||
code.STR(oaknut::WReg{value_idx}, Xbase, Roffset, index_ext);
|
||||
break;
|
||||
case 64:
|
||||
code.STR(oaknut::XReg{value_idx}, Xbase, Roffset, index_ext);
|
||||
break;
|
||||
case 128:
|
||||
code.STR(oaknut::QReg{value_idx}, Xbase, Roffset, index_ext);
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid bitsize");
|
||||
}
|
||||
}
|
||||
|
||||
return fastmem_location;
|
||||
}
|
||||
|
||||
template<size_t bitsize>
|
||||
void InlinePageTableEmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Xaddr = ctx.reg_alloc.ReadX(args[1]);
|
||||
auto Rvalue = [&] {
|
||||
if constexpr (bitsize == 128) {
|
||||
return ctx.reg_alloc.WriteQ(inst);
|
||||
} else {
|
||||
return ctx.reg_alloc.WriteReg<std::max<std::size_t>(bitsize, 32)>(inst);
|
||||
}
|
||||
}();
|
||||
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
|
||||
ctx.fpsr.Spill();
|
||||
ctx.reg_alloc.SpillFlags();
|
||||
RegAlloc::Realize(Xaddr, Rvalue);
|
||||
|
||||
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
|
||||
|
||||
const auto [Xbase, Xoffset] = InlinePageTableEmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
|
||||
EmitMemoryLdr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered);
|
||||
|
||||
ctx.deferred_emits.emplace_back([&code, &ctx, inst, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end] {
|
||||
code.l(*fallback);
|
||||
code.MOV(Xscratch0, Xaddr);
|
||||
EmitRelocation(code, ctx, WrappedReadMemoryLinkTarget(bitsize));
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
if constexpr (bitsize == 128) {
|
||||
code.MOV(Rvalue.B16(), Q0.B16());
|
||||
} else {
|
||||
code.MOV(Rvalue.toX(), Xscratch0);
|
||||
}
|
||||
ctx.conf.emit_check_memory_abort(code, ctx, inst, *end);
|
||||
code.B(*end);
|
||||
});
|
||||
|
||||
code.l(*end);
|
||||
}
|
||||
|
||||
template<size_t bitsize>
|
||||
void InlinePageTableEmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Xaddr = ctx.reg_alloc.ReadX(args[1]);
|
||||
auto Rvalue = [&] {
|
||||
if constexpr (bitsize == 128) {
|
||||
return ctx.reg_alloc.ReadQ(args[2]);
|
||||
} else {
|
||||
return ctx.reg_alloc.ReadReg<std::max<std::size_t>(bitsize, 32)>(args[2]);
|
||||
}
|
||||
}();
|
||||
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
|
||||
ctx.fpsr.Spill();
|
||||
ctx.reg_alloc.SpillFlags();
|
||||
RegAlloc::Realize(Xaddr, Rvalue);
|
||||
|
||||
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
|
||||
|
||||
const auto [Xbase, Xoffset] = InlinePageTableEmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
|
||||
EmitMemoryStr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered);
|
||||
|
||||
ctx.deferred_emits.emplace_back([&code, &ctx, inst, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end] {
|
||||
code.l(*fallback);
|
||||
if constexpr (bitsize == 128) {
|
||||
code.MOV(Xscratch0, Xaddr);
|
||||
code.MOV(Q0.B16(), Rvalue.B16());
|
||||
} else {
|
||||
code.MOV(Xscratch0, Xaddr);
|
||||
code.MOV(Xscratch1, Rvalue.toX());
|
||||
}
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
EmitRelocation(code, ctx, WrappedWriteMemoryLinkTarget(bitsize));
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
ctx.conf.emit_check_memory_abort(code, ctx, inst, *end);
|
||||
code.B(*end);
|
||||
});
|
||||
|
||||
code.l(*end);
|
||||
}
|
||||
|
||||
std::optional<DoNotFastmemMarker> ShouldFastmem(EmitContext& ctx, IR::Inst* inst) {
|
||||
if (!ctx.conf.fastmem_pointer || !ctx.fastmem.SupportsFastmem()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const auto marker = std::make_tuple(ctx.block.Location(), inst->GetName());
|
||||
if (ctx.fastmem.ShouldFastmem(marker)) {
|
||||
return marker;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
inline bool ShouldExt32(EmitContext& ctx) {
|
||||
return ctx.conf.fastmem_address_space_bits == 32 && ctx.conf.silently_mirror_fastmem;
|
||||
}
|
||||
|
||||
// May use Xscratch0 as scratch register
|
||||
// Address to read/write = [ret0 + ret1], ret0 is always Xfastmem and ret1 is either Xaddr or Xscratch0
|
||||
// Trashes NZCV
|
||||
template<size_t bitsize>
|
||||
std::pair<oaknut::XReg, oaknut::XReg> FastmemEmitVAddrLookup(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) {
|
||||
if (ctx.conf.fastmem_address_space_bits == 64 || ShouldExt32(ctx)) {
|
||||
return std::make_pair(Xfastmem, Xaddr);
|
||||
}
|
||||
|
||||
if (ctx.conf.silently_mirror_fastmem) {
|
||||
code.UBFX(Xscratch0, Xaddr, 0, ctx.conf.fastmem_address_space_bits);
|
||||
return std::make_pair(Xfastmem, Xscratch0);
|
||||
}
|
||||
|
||||
code.LSR(Xscratch0, Xaddr, ctx.conf.fastmem_address_space_bits);
|
||||
code.CBNZ(Xscratch0, *fallback);
|
||||
return std::make_pair(Xfastmem, Xaddr);
|
||||
}
|
||||
|
||||
template<size_t bitsize>
|
||||
void FastmemEmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, DoNotFastmemMarker marker) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Xaddr = ctx.reg_alloc.ReadX(args[1]);
|
||||
auto Rvalue = [&] {
|
||||
if constexpr (bitsize == 128) {
|
||||
return ctx.reg_alloc.WriteQ(inst);
|
||||
} else {
|
||||
return ctx.reg_alloc.WriteReg<std::max<std::size_t>(bitsize, 32)>(inst);
|
||||
}
|
||||
}();
|
||||
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
|
||||
ctx.fpsr.Spill();
|
||||
ctx.reg_alloc.SpillFlags();
|
||||
RegAlloc::Realize(Xaddr, Rvalue);
|
||||
|
||||
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
|
||||
|
||||
const auto [Xbase, Xoffset] = FastmemEmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
|
||||
const auto fastmem_location = EmitMemoryLdr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered, ShouldExt32(ctx));
|
||||
|
||||
ctx.deferred_emits.emplace_back([&code, &ctx, inst, marker, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end, fastmem_location] {
|
||||
ctx.ebi.fastmem_patch_info.emplace(
|
||||
fastmem_location - ctx.ebi.entry_point,
|
||||
FastmemPatchInfo{
|
||||
.marker = marker,
|
||||
.fc = FakeCall{
|
||||
.call_pc = mcl::bit_cast<u64>(code.xptr<void*>()),
|
||||
},
|
||||
.recompile = ctx.conf.recompile_on_fastmem_failure,
|
||||
});
|
||||
|
||||
code.l(*fallback);
|
||||
code.MOV(Xscratch0, Xaddr);
|
||||
EmitRelocation(code, ctx, WrappedReadMemoryLinkTarget(bitsize));
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
if constexpr (bitsize == 128) {
|
||||
code.MOV(Rvalue.B16(), Q0.B16());
|
||||
} else {
|
||||
code.MOV(Rvalue.toX(), Xscratch0);
|
||||
}
|
||||
ctx.conf.emit_check_memory_abort(code, ctx, inst, *end);
|
||||
code.B(*end);
|
||||
});
|
||||
|
||||
code.l(*end);
|
||||
}
|
||||
|
||||
template<size_t bitsize>
|
||||
void FastmemEmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, DoNotFastmemMarker marker) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Xaddr = ctx.reg_alloc.ReadX(args[1]);
|
||||
auto Rvalue = [&] {
|
||||
if constexpr (bitsize == 128) {
|
||||
return ctx.reg_alloc.ReadQ(args[2]);
|
||||
} else {
|
||||
return ctx.reg_alloc.ReadReg<std::max<std::size_t>(bitsize, 32)>(args[2]);
|
||||
}
|
||||
}();
|
||||
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
|
||||
ctx.fpsr.Spill();
|
||||
ctx.reg_alloc.SpillFlags();
|
||||
RegAlloc::Realize(Xaddr, Rvalue);
|
||||
|
||||
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
|
||||
|
||||
const auto [Xbase, Xoffset] = FastmemEmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
|
||||
const auto fastmem_location = EmitMemoryStr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered, ShouldExt32(ctx));
|
||||
|
||||
ctx.deferred_emits.emplace_back([&code, &ctx, inst, marker, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end, fastmem_location] {
|
||||
ctx.ebi.fastmem_patch_info.emplace(
|
||||
fastmem_location - ctx.ebi.entry_point,
|
||||
FastmemPatchInfo{
|
||||
.marker = marker,
|
||||
.fc = FakeCall{
|
||||
.call_pc = mcl::bit_cast<u64>(code.xptr<void*>()),
|
||||
},
|
||||
.recompile = ctx.conf.recompile_on_fastmem_failure,
|
||||
});
|
||||
|
||||
code.l(*fallback);
|
||||
if constexpr (bitsize == 128) {
|
||||
code.MOV(Xscratch0, Xaddr);
|
||||
code.MOV(Q0.B16(), Rvalue.B16());
|
||||
} else {
|
||||
code.MOV(Xscratch0, Xaddr);
|
||||
code.MOV(Xscratch1, Rvalue.toX());
|
||||
}
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
EmitRelocation(code, ctx, WrappedWriteMemoryLinkTarget(bitsize));
|
||||
if (ordered) {
|
||||
code.DMB(oaknut::BarrierOp::ISH);
|
||||
}
|
||||
ctx.conf.emit_check_memory_abort(code, ctx, inst, *end);
|
||||
code.B(*end);
|
||||
});
|
||||
|
||||
code.l(*end);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
template<size_t bitsize>
|
||||
void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
if (const auto marker = ShouldFastmem(ctx, inst)) {
|
||||
FastmemEmitReadMemory<bitsize>(code, ctx, inst, *marker);
|
||||
} else if (ctx.conf.page_table_pointer != 0) {
|
||||
InlinePageTableEmitReadMemory<bitsize>(code, ctx, inst);
|
||||
} else {
|
||||
CallbackOnlyEmitReadMemory<bitsize>(code, ctx, inst);
|
||||
}
|
||||
}
|
||||
|
||||
template<size_t bitsize>
|
||||
void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
CallbackOnlyEmitExclusiveReadMemory<bitsize>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<size_t bitsize>
|
||||
void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
if (const auto marker = ShouldFastmem(ctx, inst)) {
|
||||
FastmemEmitWriteMemory<bitsize>(code, ctx, inst, *marker);
|
||||
} else if (ctx.conf.page_table_pointer != 0) {
|
||||
InlinePageTableEmitWriteMemory<bitsize>(code, ctx, inst);
|
||||
} else {
|
||||
CallbackOnlyEmitWriteMemory<bitsize>(code, ctx, inst);
|
||||
}
|
||||
}
|
||||
|
||||
template<size_t bitsize>
|
||||
void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
CallbackOnlyEmitExclusiveWriteMemory<bitsize>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template void EmitReadMemory<8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template void EmitReadMemory<16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template void EmitReadMemory<32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template void EmitReadMemory<64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template void EmitReadMemory<128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template void EmitExclusiveReadMemory<8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template void EmitExclusiveReadMemory<16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template void EmitExclusiveReadMemory<32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template void EmitExclusiveReadMemory<64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template void EmitExclusiveReadMemory<128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template void EmitWriteMemory<8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template void EmitWriteMemory<16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template void EmitWriteMemory<32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template void EmitWriteMemory<64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template void EmitWriteMemory<128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template void EmitExclusiveWriteMemory<8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template void EmitExclusiveWriteMemory<16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template void EmitExclusiveWriteMemory<32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template void EmitExclusiveWriteMemory<64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template void EmitExclusiveWriteMemory<128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
35
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_memory.h
Normal file
35
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_memory.h
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
namespace oaknut {
|
||||
struct CodeGenerator;
|
||||
struct Label;
|
||||
} // namespace oaknut
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
enum class AccType;
|
||||
class Inst;
|
||||
} // namespace Dynarmic::IR
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
struct EmitContext;
|
||||
enum class LinkTarget;
|
||||
|
||||
template<size_t bitsize>
|
||||
void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template<size_t bitsize>
|
||||
void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template<size_t bitsize>
|
||||
void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
template<size_t bitsize>
|
||||
void EmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
409
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_packed.cpp
Normal file
409
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_packed.cpp
Normal file
|
|
@ -0,0 +1,409 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
using namespace oaknut::util;
|
||||
|
||||
template<typename EmitFn>
|
||||
static void EmitPackedOp(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
auto Vresult = ctx.reg_alloc.WriteD(inst);
|
||||
auto Va = ctx.reg_alloc.ReadD(args[0]);
|
||||
auto Vb = ctx.reg_alloc.ReadD(args[1]);
|
||||
RegAlloc::Realize(Vresult, Va, Vb);
|
||||
|
||||
emit(Vresult, Va, Vb);
|
||||
}
|
||||
|
||||
template<typename EmitFn>
|
||||
static void EmitSaturatedPackedOp(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
auto Vresult = ctx.reg_alloc.WriteD(inst);
|
||||
auto Va = ctx.reg_alloc.ReadD(args[0]);
|
||||
auto Vb = ctx.reg_alloc.ReadD(args[1]);
|
||||
RegAlloc::Realize(Vresult, Va, Vb);
|
||||
ctx.fpsr.Spill();
|
||||
|
||||
emit(Vresult, Va, Vb);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedAddU8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Vresult = ctx.reg_alloc.WriteD(inst);
|
||||
auto Va = ctx.reg_alloc.ReadD(args[0]);
|
||||
auto Vb = ctx.reg_alloc.ReadD(args[1]);
|
||||
RegAlloc::Realize(Vresult, Va, Vb);
|
||||
|
||||
code.ADD(Vresult->B8(), Va->B8(), Vb->B8());
|
||||
|
||||
if (ge_inst) {
|
||||
auto Vge = ctx.reg_alloc.WriteD(ge_inst);
|
||||
RegAlloc::Realize(Vge);
|
||||
|
||||
code.CMHI(Vge->B8(), Va->B8(), Vresult->B8());
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedAddS8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Vresult = ctx.reg_alloc.WriteD(inst);
|
||||
auto Va = ctx.reg_alloc.ReadD(args[0]);
|
||||
auto Vb = ctx.reg_alloc.ReadD(args[1]);
|
||||
RegAlloc::Realize(Vresult, Va, Vb);
|
||||
|
||||
code.ADD(Vresult->B8(), Va->B8(), Vb->B8());
|
||||
|
||||
if (ge_inst) {
|
||||
auto Vge = ctx.reg_alloc.WriteD(ge_inst);
|
||||
RegAlloc::Realize(Vge);
|
||||
|
||||
code.SHADD(Vge->B8(), Va->B8(), Vb->B8());
|
||||
code.CMGE(Vge->B8(), Vge->B8(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSubU8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Vresult = ctx.reg_alloc.WriteD(inst);
|
||||
auto Va = ctx.reg_alloc.ReadD(args[0]);
|
||||
auto Vb = ctx.reg_alloc.ReadD(args[1]);
|
||||
RegAlloc::Realize(Vresult, Va, Vb);
|
||||
|
||||
code.SUB(Vresult->B8(), Va->B8(), Vb->B8());
|
||||
|
||||
if (ge_inst) {
|
||||
auto Vge = ctx.reg_alloc.WriteD(ge_inst);
|
||||
RegAlloc::Realize(Vge);
|
||||
|
||||
code.UHSUB(Vge->B8(), Va->B8(), Vb->B8());
|
||||
code.CMGE(Vge->B8(), Vge->B8(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSubS8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Vresult = ctx.reg_alloc.WriteD(inst);
|
||||
auto Va = ctx.reg_alloc.ReadD(args[0]);
|
||||
auto Vb = ctx.reg_alloc.ReadD(args[1]);
|
||||
RegAlloc::Realize(Vresult, Va, Vb);
|
||||
|
||||
code.SUB(Vresult->B8(), Va->B8(), Vb->B8());
|
||||
|
||||
if (ge_inst) {
|
||||
auto Vge = ctx.reg_alloc.WriteD(ge_inst);
|
||||
RegAlloc::Realize(Vge);
|
||||
|
||||
code.SHSUB(Vge->B8(), Va->B8(), Vb->B8());
|
||||
code.CMGE(Vge->B8(), Vge->B8(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedAddU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Vresult = ctx.reg_alloc.WriteD(inst);
|
||||
auto Va = ctx.reg_alloc.ReadD(args[0]);
|
||||
auto Vb = ctx.reg_alloc.ReadD(args[1]);
|
||||
RegAlloc::Realize(Vresult, Va, Vb);
|
||||
|
||||
code.ADD(Vresult->H4(), Va->H4(), Vb->H4());
|
||||
|
||||
if (ge_inst) {
|
||||
auto Vge = ctx.reg_alloc.WriteD(ge_inst);
|
||||
RegAlloc::Realize(Vge);
|
||||
|
||||
code.CMHI(Vge->H4(), Va->H4(), Vresult->H4());
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedAddS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Vresult = ctx.reg_alloc.WriteD(inst);
|
||||
auto Va = ctx.reg_alloc.ReadD(args[0]);
|
||||
auto Vb = ctx.reg_alloc.ReadD(args[1]);
|
||||
RegAlloc::Realize(Vresult, Va, Vb);
|
||||
|
||||
code.ADD(Vresult->H4(), Va->H4(), Vb->H4());
|
||||
|
||||
if (ge_inst) {
|
||||
auto Vge = ctx.reg_alloc.WriteD(ge_inst);
|
||||
RegAlloc::Realize(Vge);
|
||||
|
||||
code.SHADD(Vge->H4(), Va->H4(), Vb->H4());
|
||||
code.CMGE(Vge->H4(), Vge->H4(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSubU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Vresult = ctx.reg_alloc.WriteD(inst);
|
||||
auto Va = ctx.reg_alloc.ReadD(args[0]);
|
||||
auto Vb = ctx.reg_alloc.ReadD(args[1]);
|
||||
RegAlloc::Realize(Vresult, Va, Vb);
|
||||
|
||||
code.SUB(Vresult->H4(), Va->H4(), Vb->H4());
|
||||
|
||||
if (ge_inst) {
|
||||
auto Vge = ctx.reg_alloc.WriteD(ge_inst);
|
||||
RegAlloc::Realize(Vge);
|
||||
|
||||
code.UHSUB(Vge->H4(), Va->H4(), Vb->H4());
|
||||
code.CMGE(Vge->H4(), Vge->H4(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSubS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Vresult = ctx.reg_alloc.WriteD(inst);
|
||||
auto Va = ctx.reg_alloc.ReadD(args[0]);
|
||||
auto Vb = ctx.reg_alloc.ReadD(args[1]);
|
||||
RegAlloc::Realize(Vresult, Va, Vb);
|
||||
|
||||
code.SUB(Vresult->H4(), Va->H4(), Vb->H4());
|
||||
|
||||
if (ge_inst) {
|
||||
auto Vge = ctx.reg_alloc.WriteD(ge_inst);
|
||||
RegAlloc::Realize(Vge);
|
||||
|
||||
code.SHSUB(Vge->H4(), Va->H4(), Vb->H4());
|
||||
code.CMGE(Vge->H4(), Vge->H4(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
template<bool add_is_hi, bool is_signed, bool is_halving>
|
||||
static void EmitPackedAddSub(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Vresult = ctx.reg_alloc.WriteD(inst);
|
||||
auto Va = ctx.reg_alloc.ReadD(args[0]);
|
||||
auto Vb = ctx.reg_alloc.ReadD(args[1]);
|
||||
RegAlloc::Realize(Vresult, Va, Vb);
|
||||
|
||||
if (is_signed) {
|
||||
code.SXTL(V0.S4(), Va->H4());
|
||||
code.SXTL(V1.S4(), Vb->H4());
|
||||
} else {
|
||||
code.UXTL(V0.S4(), Va->H4());
|
||||
code.UXTL(V1.S4(), Vb->H4());
|
||||
}
|
||||
code.EXT(V1.B8(), V1.B8(), V1.B8(), 4);
|
||||
|
||||
code.MOVI(D2, oaknut::RepImm{add_is_hi ? 0b11110000 : 0b00001111});
|
||||
|
||||
code.EOR(V1.B8(), V1.B8(), V2.B8());
|
||||
code.SUB(V1.S2(), V1.S2(), V2.S2());
|
||||
code.SUB(Vresult->S2(), V0.S2(), V1.S2());
|
||||
|
||||
if (is_halving) {
|
||||
if (is_signed) {
|
||||
code.SSHR(Vresult->S2(), Vresult->S2(), 1);
|
||||
} else {
|
||||
code.USHR(Vresult->S2(), Vresult->S2(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (ge_inst) {
|
||||
ASSERT(!is_halving);
|
||||
|
||||
auto Vge = ctx.reg_alloc.WriteD(ge_inst);
|
||||
RegAlloc::Realize(Vge);
|
||||
|
||||
if (is_signed) {
|
||||
code.CMGE(Vge->S2(), Vresult->S2(), 0);
|
||||
code.XTN(Vge->H4(), Vge->toQ().S4());
|
||||
} else {
|
||||
code.CMEQ(Vge->H4(), Vresult->H4(), 0);
|
||||
code.EOR(Vge->B8(), Vge->B8(), V2.B8());
|
||||
code.SHRN(Vge->H4(), Vge->toQ().S4(), 16);
|
||||
}
|
||||
}
|
||||
|
||||
code.XTN(Vresult->H4(), Vresult->toQ().S4());
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedAddSubU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedAddSub<true, false, false>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedAddSubS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedAddSub<true, true, false>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSubAddU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedAddSub<false, false, false>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSubAddS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedAddSub<false, true, false>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingAddU8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.UHADD(Vresult->B8(), Va->B8(), Vb->B8()); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingAddS8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.SHADD(Vresult->B8(), Va->B8(), Vb->B8()); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingSubU8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.UHSUB(Vresult->B8(), Va->B8(), Vb->B8()); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingSubS8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.SHSUB(Vresult->B8(), Va->B8(), Vb->B8()); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingAddU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.UHADD(Vresult->H4(), Va->H4(), Vb->H4()); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingAddS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.SHADD(Vresult->H4(), Va->H4(), Vb->H4()); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingSubU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.UHSUB(Vresult->H4(), Va->H4(), Vb->H4()); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingSubS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.SHSUB(Vresult->H4(), Va->H4(), Vb->H4()); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingAddSubU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedAddSub<true, false, true>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingAddSubS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedAddSub<true, true, true>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingSubAddU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedAddSub<false, false, true>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingSubAddS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedAddSub<false, true, true>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSaturatedAddU8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSaturatedPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.UQADD(Vresult->B8(), Va->B8(), Vb->B8()); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSaturatedAddS8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSaturatedPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.SQADD(Vresult->B8(), Va->B8(), Vb->B8()); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSaturatedSubU8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSaturatedPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.UQSUB(Vresult->B8(), Va->B8(), Vb->B8()); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSaturatedSubS8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSaturatedPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.SQSUB(Vresult->B8(), Va->B8(), Vb->B8()); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSaturatedAddU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSaturatedPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.UQADD(Vresult->H4(), Va->H4(), Vb->H4()); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSaturatedAddS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSaturatedPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.SQADD(Vresult->H4(), Va->H4(), Vb->H4()); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSaturatedSubU16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSaturatedPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.UQSUB(Vresult->H4(), Va->H4(), Vb->H4()); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSaturatedSubS16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSaturatedPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) { code.SQSUB(Vresult->H4(), Va->H4(), Vb->H4()); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedAbsDiffSumU8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedOp(code, ctx, inst, [&](auto& Vresult, auto& Va, auto& Vb) {
|
||||
code.MOVI(D2, oaknut::RepImm{0b00001111});
|
||||
code.UABD(Vresult->B8(), Va->B8(), Vb->B8());
|
||||
code.AND(Vresult->B8(), Vresult->B8(), V2.B8()); // TODO: Zext tracking
|
||||
code.UADDLV(Vresult->toH(), Vresult->B8());
|
||||
});
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSelect>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
auto Vresult = ctx.reg_alloc.WriteD(inst);
|
||||
auto Vge = ctx.reg_alloc.ReadD(args[0]);
|
||||
auto Va = ctx.reg_alloc.ReadD(args[1]);
|
||||
auto Vb = ctx.reg_alloc.ReadD(args[2]);
|
||||
RegAlloc::Realize(Vresult, Vge, Va, Vb);
|
||||
|
||||
code.FMOV(Vresult, Vge); // TODO: Move elimination
|
||||
code.BSL(Vresult->B8(), Vb->B8(), Va->B8());
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
|
|
@ -0,0 +1,273 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
using namespace oaknut::util;
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedAddWithFlag32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||
ASSERT(overflow_inst);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
||||
auto Wa = ctx.reg_alloc.ReadW(args[0]);
|
||||
auto Wb = ctx.reg_alloc.ReadW(args[1]);
|
||||
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
|
||||
RegAlloc::Realize(Wresult, Wa, Wb, Woverflow);
|
||||
ctx.reg_alloc.SpillFlags();
|
||||
|
||||
code.ADDS(Wresult, *Wa, Wb);
|
||||
code.ASR(Wscratch0, Wresult, 31);
|
||||
code.EOR(Wscratch0, Wscratch0, 0x8000'0000);
|
||||
code.CSEL(Wresult, Wresult, Wscratch0, VC);
|
||||
code.CSET(Woverflow, VS);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedSubWithFlag32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||
ASSERT(overflow_inst);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
||||
auto Wa = ctx.reg_alloc.ReadW(args[0]);
|
||||
auto Wb = ctx.reg_alloc.ReadW(args[1]);
|
||||
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
|
||||
RegAlloc::Realize(Wresult, Wa, Wb, Woverflow);
|
||||
ctx.reg_alloc.SpillFlags();
|
||||
|
||||
code.SUBS(Wresult, *Wa, Wb);
|
||||
code.ASR(Wscratch0, Wresult, 31);
|
||||
code.EOR(Wscratch0, Wscratch0, 0x8000'0000);
|
||||
code.CSEL(Wresult, Wresult, Wscratch0, VC);
|
||||
code.CSET(Woverflow, VS);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturation>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const size_t N = args[1].GetImmediateU8();
|
||||
ASSERT(N >= 1 && N <= 32);
|
||||
|
||||
if (N == 32) {
|
||||
ctx.reg_alloc.DefineAsExisting(inst, args[0]);
|
||||
if (overflow_inst) {
|
||||
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
|
||||
RegAlloc::Realize(Woverflow);
|
||||
code.MOV(*Woverflow, WZR);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 positive_saturated_value = (1u << (N - 1)) - 1;
|
||||
const u32 negative_saturated_value = ~u32{0} << (N - 1);
|
||||
|
||||
auto Woperand = ctx.reg_alloc.ReadW(args[0]);
|
||||
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
||||
RegAlloc::Realize(Woperand, Wresult);
|
||||
ctx.reg_alloc.SpillFlags();
|
||||
|
||||
code.MOV(Wscratch0, negative_saturated_value);
|
||||
code.MOV(Wscratch1, positive_saturated_value);
|
||||
code.CMP(*Woperand, Wscratch0);
|
||||
code.CSEL(Wresult, Woperand, Wscratch0, GT);
|
||||
code.CMP(*Woperand, Wscratch1);
|
||||
code.CSEL(Wresult, Wresult, Wscratch1, LT);
|
||||
|
||||
if (overflow_inst) {
|
||||
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
|
||||
RegAlloc::Realize(Woverflow);
|
||||
code.CMP(*Wresult, Woperand);
|
||||
code.CSET(Woverflow, NE);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedSaturation>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Wresult = ctx.reg_alloc.WriteW(inst);
|
||||
auto Woperand = ctx.reg_alloc.ReadW(args[0]);
|
||||
RegAlloc::Realize(Wresult, Woperand);
|
||||
ctx.reg_alloc.SpillFlags();
|
||||
|
||||
const size_t N = args[1].GetImmediateU8();
|
||||
ASSERT(N <= 31);
|
||||
const u32 saturated_value = (1u << N) - 1;
|
||||
|
||||
code.MOV(Wscratch0, saturated_value);
|
||||
code.CMP(*Woperand, 0);
|
||||
code.CSEL(Wresult, Woperand, WZR, GT);
|
||||
code.CMP(*Woperand, Wscratch0);
|
||||
code.CSEL(Wresult, Wresult, Wscratch0, LT);
|
||||
|
||||
if (overflow_inst) {
|
||||
auto Woverflow = ctx.reg_alloc.WriteW(overflow_inst);
|
||||
RegAlloc::Realize(Woverflow);
|
||||
code.CSET(Woverflow, HI);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedAdd8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedAdd16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedAdd32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedAdd64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedDoublingMultiplyReturnHigh16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedDoublingMultiplyReturnHigh32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedSub8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedSub16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedSub32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedSub64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedSaturatedAdd8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedSaturatedAdd16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedSaturatedAdd32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedSaturatedAdd64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedSaturatedSub8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedSaturatedSub16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedSaturatedSub32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedSaturatedSub64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
1889
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector.cpp
Normal file
1889
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector.cpp
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,791 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <mcl/bit_cast.hpp>
|
||||
#include <mcl/mp/metavalue/lift_value.hpp>
|
||||
#include <mcl/mp/typelist/cartesian_product.hpp>
|
||||
#include <mcl/mp/typelist/get.hpp>
|
||||
#include <mcl/mp/typelist/lift_sequence.hpp>
|
||||
#include <mcl/mp/typelist/list.hpp>
|
||||
#include <mcl/mp/typelist/lower_to_tuple.hpp>
|
||||
#include <mcl/type_traits/function_info.hpp>
|
||||
#include <mcl/type_traits/integer_of_size.hpp>
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/a64_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/common/always_false.h"
|
||||
#include "dynarmic/common/cast_util.h"
|
||||
#include "dynarmic/common/fp/fpcr.h"
|
||||
#include "dynarmic/common/fp/fpsr.h"
|
||||
#include "dynarmic/common/fp/info.h"
|
||||
#include "dynarmic/common/fp/op.h"
|
||||
#include "dynarmic/common/fp/rounding_mode.h"
|
||||
#include "dynarmic/common/lut_from_list.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
using namespace oaknut::util;
|
||||
namespace mp = mcl::mp;
|
||||
|
||||
using A64FullVectorWidth = std::integral_constant<size_t, 128>;
|
||||
|
||||
// Array alias that always sizes itself according to the given type T
|
||||
// relative to the size of a vector register. e.g. T = u32 would result
|
||||
// in a std::array<u32, 4>.
|
||||
template<typename T>
|
||||
using VectorArray = std::array<T, A64FullVectorWidth::value / mcl::bitsizeof<T>>;
|
||||
|
||||
template<typename EmitFn>
|
||||
static void MaybeStandardFPSCRValue(oaknut::CodeGenerator& code, EmitContext& ctx, bool fpcr_controlled, EmitFn emit) {
|
||||
if (ctx.FPCR(fpcr_controlled) != ctx.FPCR()) {
|
||||
code.MOV(Wscratch0, ctx.FPCR(fpcr_controlled).Value());
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
emit();
|
||||
code.MOV(Wscratch0, ctx.FPCR().Value());
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
} else {
|
||||
emit();
|
||||
}
|
||||
}
|
||||
|
||||
template<typename EmitFn>
|
||||
static void EmitTwoOp(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Qresult = ctx.reg_alloc.WriteQ(inst);
|
||||
auto Qa = ctx.reg_alloc.ReadQ(args[0]);
|
||||
const bool fpcr_controlled = args[1].IsVoid() || args[1].GetImmediateU1();
|
||||
RegAlloc::Realize(Qresult, Qa);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { emit(Qresult, Qa); });
|
||||
}
|
||||
|
||||
template<size_t size, typename EmitFn>
|
||||
static void EmitTwoOpArranged(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
|
||||
EmitTwoOp(code, ctx, inst, [&](auto& Qresult, auto& Qa) {
|
||||
if constexpr (size == 16) {
|
||||
emit(Qresult->H8(), Qa->H8());
|
||||
} else if constexpr (size == 32) {
|
||||
emit(Qresult->S4(), Qa->S4());
|
||||
} else if constexpr (size == 64) {
|
||||
emit(Qresult->D2(), Qa->D2());
|
||||
} else {
|
||||
static_assert(Common::always_false_v<mcl::mp::lift_value<size>>);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template<typename EmitFn>
|
||||
static void EmitThreeOp(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Qresult = ctx.reg_alloc.WriteQ(inst);
|
||||
auto Qa = ctx.reg_alloc.ReadQ(args[0]);
|
||||
auto Qb = ctx.reg_alloc.ReadQ(args[1]);
|
||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||
RegAlloc::Realize(Qresult, Qa, Qb);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { emit(Qresult, Qa, Qb); });
|
||||
}
|
||||
|
||||
template<size_t size, typename EmitFn>
|
||||
static void EmitThreeOpArranged(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
|
||||
EmitThreeOp(code, ctx, inst, [&](auto& Qresult, auto& Qa, auto& Qb) {
|
||||
if constexpr (size == 16) {
|
||||
emit(Qresult->H8(), Qa->H8(), Qb->H8());
|
||||
} else if constexpr (size == 32) {
|
||||
emit(Qresult->S4(), Qa->S4(), Qb->S4());
|
||||
} else if constexpr (size == 64) {
|
||||
emit(Qresult->D2(), Qa->D2(), Qb->D2());
|
||||
} else {
|
||||
static_assert(Common::always_false_v<mcl::mp::lift_value<size>>);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template<size_t size, typename EmitFn>
|
||||
static void EmitFMA(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Qresult = ctx.reg_alloc.ReadWriteQ(args[0], inst);
|
||||
auto Qm = ctx.reg_alloc.ReadQ(args[1]);
|
||||
auto Qn = ctx.reg_alloc.ReadQ(args[2]);
|
||||
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||
RegAlloc::Realize(Qresult, Qm, Qn);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
if constexpr (size == 16) {
|
||||
emit(Qresult->H8(), Qm->H8(), Qn->H8());
|
||||
} else if constexpr (size == 32) {
|
||||
emit(Qresult->S4(), Qm->S4(), Qn->S4());
|
||||
} else if constexpr (size == 64) {
|
||||
emit(Qresult->D2(), Qm->D2(), Qn->D2());
|
||||
} else {
|
||||
static_assert(Common::always_false_v<mcl::mp::lift_value<size>>);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template<size_t size, typename EmitFn>
|
||||
static void EmitFromFixed(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Qto = ctx.reg_alloc.WriteQ(inst);
|
||||
auto Qfrom = ctx.reg_alloc.ReadQ(args[0]);
|
||||
const u8 fbits = args[1].GetImmediateU8();
|
||||
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
|
||||
RegAlloc::Realize(Qto, Qfrom);
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
if constexpr (size == 32) {
|
||||
emit(Qto->S4(), Qfrom->S4(), fbits);
|
||||
} else if constexpr (size == 64) {
|
||||
emit(Qto->D2(), Qfrom->D2(), fbits);
|
||||
} else {
|
||||
static_assert(Common::always_false_v<mcl::mp::lift_value<size>>);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template<size_t fsize, bool is_signed>
|
||||
void EmitToFixed(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Qto = ctx.reg_alloc.WriteQ(inst);
|
||||
auto Qfrom = ctx.reg_alloc.ReadQ(args[0]);
|
||||
const size_t fbits = args[1].GetImmediateU8();
|
||||
const auto rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||
const bool fpcr_controlled = inst->GetArg(3).GetU1();
|
||||
RegAlloc::Realize(Qto, Qfrom);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
auto Vto = [&] {
|
||||
if constexpr (fsize == 32) {
|
||||
return Qto->S4();
|
||||
} else if constexpr (fsize == 64) {
|
||||
return Qto->D2();
|
||||
} else {
|
||||
static_assert(Common::always_false_v<mcl::mp::lift_value<fsize>>);
|
||||
}
|
||||
}();
|
||||
auto Vfrom = [&] {
|
||||
if constexpr (fsize == 32) {
|
||||
return Qfrom->S4();
|
||||
} else if constexpr (fsize == 64) {
|
||||
return Qfrom->D2();
|
||||
} else {
|
||||
static_assert(Common::always_false_v<mcl::mp::lift_value<fsize>>);
|
||||
}
|
||||
}();
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
if (rounding_mode == FP::RoundingMode::TowardsZero) {
|
||||
if constexpr (is_signed) {
|
||||
if (fbits) {
|
||||
code.FCVTZS(Vto, Vfrom, fbits);
|
||||
} else {
|
||||
code.FCVTZS(Vto, Vfrom);
|
||||
}
|
||||
} else {
|
||||
if (fbits) {
|
||||
code.FCVTZU(Vto, Vfrom, fbits);
|
||||
} else {
|
||||
code.FCVTZU(Vto, Vfrom);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ASSERT(fbits == 0);
|
||||
if constexpr (is_signed) {
|
||||
switch (rounding_mode) {
|
||||
case FP::RoundingMode::ToNearest_TieEven:
|
||||
code.FCVTNS(Vto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsPlusInfinity:
|
||||
code.FCVTPS(Vto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsMinusInfinity:
|
||||
code.FCVTMS(Vto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsZero:
|
||||
code.FCVTZS(Vto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::ToNearest_TieAwayFromZero:
|
||||
code.FCVTAS(Vto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::ToOdd:
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid RoundingMode");
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (rounding_mode) {
|
||||
case FP::RoundingMode::ToNearest_TieEven:
|
||||
code.FCVTNU(Vto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsPlusInfinity:
|
||||
code.FCVTPU(Vto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsMinusInfinity:
|
||||
code.FCVTMU(Vto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsZero:
|
||||
code.FCVTZU(Vto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::ToNearest_TieAwayFromZero:
|
||||
code.FCVTAU(Vto, Vfrom);
|
||||
break;
|
||||
case FP::RoundingMode::ToOdd:
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid RoundingMode");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template<typename Lambda>
|
||||
static void EmitTwoOpFallbackWithoutRegAlloc(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::QReg Qresult, oaknut::QReg Qarg1, Lambda lambda, bool fpcr_controlled) {
|
||||
const auto fn = static_cast<mcl::equivalent_function_type<Lambda>*>(lambda);
|
||||
|
||||
const u32 fpcr = ctx.FPCR(fpcr_controlled).Value();
|
||||
constexpr u64 stack_size = sizeof(u64) * 4; // sizeof(u128) * 2
|
||||
|
||||
ABI_PushRegisters(code, ABI_CALLER_SAVE & ~(1ull << Qresult.index()), stack_size);
|
||||
|
||||
code.MOV(Xscratch0, mcl::bit_cast<u64>(fn));
|
||||
code.ADD(X0, SP, 0 * 16);
|
||||
code.ADD(X1, SP, 1 * 16);
|
||||
code.MOV(X2, fpcr);
|
||||
code.ADD(X3, Xstate, ctx.conf.state_fpsr_offset);
|
||||
code.STR(Qarg1, X1);
|
||||
code.BLR(Xscratch0);
|
||||
code.LDR(Qresult, SP);
|
||||
|
||||
ABI_PopRegisters(code, ABI_CALLER_SAVE & ~(1ull << Qresult.index()), stack_size);
|
||||
}
|
||||
|
||||
template<size_t fpcr_controlled_arg_index = 1, typename Lambda>
|
||||
static void EmitTwoOpFallback(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Qarg1 = ctx.reg_alloc.ReadQ(args[0]);
|
||||
auto Qresult = ctx.reg_alloc.WriteQ(inst);
|
||||
RegAlloc::Realize(Qarg1, Qresult);
|
||||
ctx.reg_alloc.SpillFlags();
|
||||
ctx.fpsr.Spill();
|
||||
|
||||
const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1();
|
||||
EmitTwoOpFallbackWithoutRegAlloc(code, ctx, Qresult, Qarg1, lambda, fpcr_controlled);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorAbs16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Qresult = ctx.reg_alloc.ReadWriteQ(args[0], inst);
|
||||
RegAlloc::Realize(Qresult);
|
||||
|
||||
code.BIC(Qresult->H8(), 0b10000000, LSL, 8);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorAbs32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va) { code.FABS(Vresult, Va); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorAbs64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va) { code.FABS(Vresult, Va); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorAdd32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FADD(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorAdd64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FADD(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorDiv32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FDIV(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorDiv64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FDIV(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorEqual16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorEqual32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FCMEQ(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorEqual64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FCMEQ(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorFromHalf32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto rounding_mode = static_cast<FP::RoundingMode>(args[1].GetImmediateU8());
|
||||
ASSERT(rounding_mode == FP::RoundingMode::ToNearest_TieEven);
|
||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||
|
||||
auto Qresult = ctx.reg_alloc.WriteQ(inst);
|
||||
auto Doperand = ctx.reg_alloc.ReadD(args[0]);
|
||||
RegAlloc::Realize(Qresult, Doperand);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
code.FCVTL(Qresult->S4(), Doperand->H4());
|
||||
});
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorFromSignedFixed32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFromFixed<32>(code, ctx, inst, [&](auto Vto, auto Vfrom, u8 fbits) { fbits ? code.SCVTF(Vto, Vfrom, fbits) : code.SCVTF(Vto, Vfrom); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorFromSignedFixed64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFromFixed<64>(code, ctx, inst, [&](auto Vto, auto Vfrom, u8 fbits) { fbits ? code.SCVTF(Vto, Vfrom, fbits) : code.SCVTF(Vto, Vfrom); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorFromUnsignedFixed32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFromFixed<32>(code, ctx, inst, [&](auto Vto, auto Vfrom, u8 fbits) { fbits ? code.UCVTF(Vto, Vfrom, fbits) : code.UCVTF(Vto, Vfrom); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorFromUnsignedFixed64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFromFixed<64>(code, ctx, inst, [&](auto Vto, auto Vfrom, u8 fbits) { fbits ? code.UCVTF(Vto, Vfrom, fbits) : code.UCVTF(Vto, Vfrom); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorGreater32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FCMGT(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorGreater64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FCMGT(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorGreaterEqual32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FCMGE(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorGreaterEqual64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FCMGE(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMax32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMAX(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMax64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMAX(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMaxNumeric32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMAXNM(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMaxNumeric64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMAXNM(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMin32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMIN(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMin64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMIN(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMinNumeric32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMINNM(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMinNumeric64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMINNM(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMul32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMUL(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMul64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMUL(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMulAdd16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMulAdd32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFMA<32>(code, ctx, inst, [&](auto Va, auto Vn, auto Vm) { code.FMLA(Va, Vn, Vm); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMulAdd64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFMA<64>(code, ctx, inst, [&](auto Va, auto Vn, auto Vm) { code.FMLA(Va, Vn, Vm); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMulX32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMULX(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMulX64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FMULX(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorNeg16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorNeg32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va) { code.FNEG(Vresult, Va); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorNeg64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va) { code.FNEG(Vresult, Va); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorPairedAdd32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FADDP(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorPairedAdd64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FADDP(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorPairedAddLower32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp(code, ctx, inst, [&](auto& Qresult, auto& Qa, auto& Qb) {
|
||||
code.ZIP1(V0.D2(), Qa->D2(), Qb->D2());
|
||||
code.MOVI(D1, oaknut::RepImm{0});
|
||||
code.FADDP(Qresult->S4(), V0.S4(), V1.S4());
|
||||
});
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorPairedAddLower64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOp(code, ctx, inst, [&](auto& Qresult, auto& Qa, auto& Qb) {
|
||||
code.ZIP1(V0.D2(), Qa->D2(), Qb->D2());
|
||||
code.FADDP(Qresult->toD(), V0.D2());
|
||||
});
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRecipEstimate16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRecipEstimate32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Voperand) { code.FRECPE(Vresult, Voperand); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRecipEstimate64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Voperand) { code.FRECPE(Vresult, Voperand); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRecipStepFused16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRecipStepFused32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FRECPS(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRecipStepFused64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FRECPS(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRoundInt16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto rounding = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
|
||||
const bool exact = inst->GetArg(2).GetU1();
|
||||
|
||||
using rounding_list = mp::list<
|
||||
mp::lift_value<FP::RoundingMode::ToNearest_TieEven>,
|
||||
mp::lift_value<FP::RoundingMode::TowardsPlusInfinity>,
|
||||
mp::lift_value<FP::RoundingMode::TowardsMinusInfinity>,
|
||||
mp::lift_value<FP::RoundingMode::TowardsZero>,
|
||||
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
|
||||
using exact_list = mp::list<std::true_type, std::false_type>;
|
||||
|
||||
static const auto lut = Common::GenerateLookupTableFromList(
|
||||
[]<typename I>(I) {
|
||||
using FPT = u16;
|
||||
return std::pair{
|
||||
mp::lower_to_tuple_v<I>,
|
||||
Common::FptrCast(
|
||||
[](VectorArray<FPT>& output, const VectorArray<FPT>& input, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
||||
constexpr FP::RoundingMode rounding_mode = mp::get<0, I>::value;
|
||||
constexpr bool exact = mp::get<1, I>::value;
|
||||
|
||||
for (size_t i = 0; i < output.size(); ++i) {
|
||||
output[i] = static_cast<FPT>(FP::FPRoundInt<FPT>(input[i], fpcr, rounding_mode, exact, fpsr));
|
||||
}
|
||||
})};
|
||||
},
|
||||
mp::cartesian_product<rounding_list, exact_list>{});
|
||||
|
||||
EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact)));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRoundInt32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto rounding_mode = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
|
||||
const bool exact = inst->GetArg(2).GetU1();
|
||||
const bool fpcr_controlled = inst->GetArg(3).GetU1();
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Qresult = ctx.reg_alloc.WriteQ(inst);
|
||||
auto Qoperand = ctx.reg_alloc.ReadQ(args[0]);
|
||||
RegAlloc::Realize(Qresult, Qoperand);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
if (exact) {
|
||||
ASSERT(ctx.FPCR(fpcr_controlled).RMode() == rounding_mode);
|
||||
code.FRINTX(Qresult->S4(), Qoperand->S4());
|
||||
} else {
|
||||
switch (rounding_mode) {
|
||||
case FP::RoundingMode::ToNearest_TieEven:
|
||||
code.FRINTN(Qresult->S4(), Qoperand->S4());
|
||||
break;
|
||||
case FP::RoundingMode::TowardsPlusInfinity:
|
||||
code.FRINTP(Qresult->S4(), Qoperand->S4());
|
||||
break;
|
||||
case FP::RoundingMode::TowardsMinusInfinity:
|
||||
code.FRINTM(Qresult->S4(), Qoperand->S4());
|
||||
break;
|
||||
case FP::RoundingMode::TowardsZero:
|
||||
code.FRINTZ(Qresult->S4(), Qoperand->S4());
|
||||
break;
|
||||
case FP::RoundingMode::ToNearest_TieAwayFromZero:
|
||||
code.FRINTA(Qresult->S4(), Qoperand->S4());
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid RoundingMode");
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRoundInt64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto rounding_mode = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
|
||||
const bool exact = inst->GetArg(2).GetU1();
|
||||
const bool fpcr_controlled = inst->GetArg(3).GetU1();
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Qresult = ctx.reg_alloc.WriteQ(inst);
|
||||
auto Qoperand = ctx.reg_alloc.ReadQ(args[0]);
|
||||
RegAlloc::Realize(Qresult, Qoperand);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
if (exact) {
|
||||
ASSERT(ctx.FPCR(fpcr_controlled).RMode() == rounding_mode);
|
||||
code.FRINTX(Qresult->D2(), Qoperand->D2());
|
||||
} else {
|
||||
switch (rounding_mode) {
|
||||
case FP::RoundingMode::ToNearest_TieEven:
|
||||
code.FRINTN(Qresult->D2(), Qoperand->D2());
|
||||
break;
|
||||
case FP::RoundingMode::TowardsPlusInfinity:
|
||||
code.FRINTP(Qresult->D2(), Qoperand->D2());
|
||||
break;
|
||||
case FP::RoundingMode::TowardsMinusInfinity:
|
||||
code.FRINTM(Qresult->D2(), Qoperand->D2());
|
||||
break;
|
||||
case FP::RoundingMode::TowardsZero:
|
||||
code.FRINTZ(Qresult->D2(), Qoperand->D2());
|
||||
break;
|
||||
case FP::RoundingMode::ToNearest_TieAwayFromZero:
|
||||
code.FRINTA(Qresult->D2(), Qoperand->D2());
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid RoundingMode");
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRSqrtEstimate16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRSqrtEstimate32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Voperand) { code.FRSQRTE(Vresult, Voperand); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRSqrtEstimate64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Voperand) { code.FRSQRTE(Vresult, Voperand); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRSqrtStepFused16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRSqrtStepFused32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FRSQRTS(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRSqrtStepFused64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FRSQRTS(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorSqrt32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va) { code.FSQRT(Vresult, Va); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorSqrt64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va) { code.FSQRT(Vresult, Va); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorSub32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FSUB(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorSub64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitThreeOpArranged<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.FSUB(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorToHalf32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto rounding_mode = static_cast<FP::RoundingMode>(args[1].GetImmediateU8());
|
||||
ASSERT(rounding_mode == FP::RoundingMode::ToNearest_TieEven);
|
||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||
|
||||
auto Dresult = ctx.reg_alloc.WriteD(inst);
|
||||
auto Qoperand = ctx.reg_alloc.ReadQ(args[0]);
|
||||
RegAlloc::Realize(Dresult, Qoperand);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
code.FCVTN(Dresult->H4(), Qoperand->S4());
|
||||
});
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorToSignedFixed16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorToSignedFixed32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitToFixed<32, true>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorToSignedFixed64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitToFixed<64, true>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorToUnsignedFixed16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
(void)code;
|
||||
(void)ctx;
|
||||
(void)inst;
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorToUnsignedFixed32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitToFixed<32, false>(code, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorToUnsignedFixed64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitToFixed<64, false>(code, ctx, inst);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
|
|
@ -0,0 +1,126 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <mcl/mp/metavalue/lift_value.hpp>
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/common/always_false.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
using namespace oaknut::util;
|
||||
|
||||
template<size_t size, typename EmitFn>
|
||||
static void Emit(oaknut::CodeGenerator&, EmitContext& ctx, IR::Inst* inst, EmitFn emit) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto Qresult = ctx.reg_alloc.WriteQ(inst);
|
||||
auto Qa = ctx.reg_alloc.ReadQ(args[0]);
|
||||
auto Qb = ctx.reg_alloc.ReadQ(args[1]);
|
||||
RegAlloc::Realize(Qresult, Qa, Qb);
|
||||
ctx.fpsr.Load();
|
||||
|
||||
if constexpr (size == 8) {
|
||||
emit(Qresult->B16(), Qa->B16(), Qb->B16());
|
||||
} else if constexpr (size == 16) {
|
||||
emit(Qresult->H8(), Qa->H8(), Qb->H8());
|
||||
} else if constexpr (size == 32) {
|
||||
emit(Qresult->S4(), Qa->S4(), Qb->S4());
|
||||
} else if constexpr (size == 64) {
|
||||
emit(Qresult->D2(), Qa->D2(), Qb->D2());
|
||||
} else {
|
||||
static_assert(Common::always_false_v<mcl::mp::lift_value<size>>);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorSignedSaturatedAdd8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
Emit<8>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.SQADD(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorSignedSaturatedAdd16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
Emit<16>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.SQADD(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorSignedSaturatedAdd32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
Emit<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.SQADD(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorSignedSaturatedAdd64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
Emit<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.SQADD(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorSignedSaturatedSub8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
Emit<8>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.SQSUB(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorSignedSaturatedSub16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
Emit<16>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.SQSUB(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorSignedSaturatedSub32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
Emit<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.SQSUB(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorSignedSaturatedSub64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
Emit<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.SQSUB(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorUnsignedSaturatedAdd8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
Emit<8>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.UQADD(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorUnsignedSaturatedAdd16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
Emit<16>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.UQADD(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorUnsignedSaturatedAdd32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
Emit<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.UQADD(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorUnsignedSaturatedAdd64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
Emit<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.UQADD(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorUnsignedSaturatedSub8>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
Emit<8>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.UQSUB(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorUnsignedSaturatedSub16>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
Emit<16>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.UQSUB(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorUnsignedSaturatedSub32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
Emit<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.UQSUB(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorUnsignedSaturatedSub64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
Emit<64>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.UQSUB(Vresult, Va, Vb); });
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
51
src/dynarmic/src/dynarmic/backend/arm64/emit_context.h
Normal file
51
src/dynarmic/src/dynarmic/backend/arm64/emit_context.h
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
#include "dynarmic/common/fp/fpcr.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
class Block;
|
||||
} // namespace Dynarmic::IR
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
struct EmitConfig;
|
||||
class FastmemManager;
|
||||
class FpsrManager;
|
||||
|
||||
using SharedLabel = std::shared_ptr<oaknut::Label>;
|
||||
|
||||
inline SharedLabel GenSharedLabel() {
|
||||
return std::make_shared<oaknut::Label>();
|
||||
}
|
||||
|
||||
struct EmitContext {
|
||||
IR::Block& block;
|
||||
RegAlloc& reg_alloc;
|
||||
const EmitConfig& conf;
|
||||
EmittedBlockInfo& ebi;
|
||||
FpsrManager& fpsr;
|
||||
FastmemManager& fastmem;
|
||||
|
||||
std::vector<std::function<void()>> deferred_emits;
|
||||
|
||||
FP::FPCR FPCR(bool fpcr_controlled = true) const {
|
||||
const FP::FPCR fpcr = conf.descriptor_to_fpcr(block.Location());
|
||||
return fpcr_controlled ? fpcr : fpcr.ASIMDStandardValue();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/interface/exclusive_monitor.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
|
||||
namespace Dynarmic {
|
||||
|
||||
ExclusiveMonitor::ExclusiveMonitor(size_t processor_count)
|
||||
: exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS), exclusive_values(processor_count) {}
|
||||
|
||||
size_t ExclusiveMonitor::GetProcessorCount() const {
|
||||
return exclusive_addresses.size();
|
||||
}
|
||||
|
||||
void ExclusiveMonitor::Lock() {
|
||||
lock.Lock();
|
||||
}
|
||||
|
||||
void ExclusiveMonitor::Unlock() {
|
||||
lock.Unlock();
|
||||
}
|
||||
|
||||
bool ExclusiveMonitor::CheckAndClear(size_t processor_id, VAddr address) {
|
||||
const VAddr masked_address = address & RESERVATION_GRANULE_MASK;
|
||||
|
||||
Lock();
|
||||
if (exclusive_addresses[processor_id] != masked_address) {
|
||||
Unlock();
|
||||
return false;
|
||||
}
|
||||
|
||||
for (VAddr& other_address : exclusive_addresses) {
|
||||
if (other_address == masked_address) {
|
||||
other_address = INVALID_EXCLUSIVE_ADDRESS;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void ExclusiveMonitor::Clear() {
|
||||
Lock();
|
||||
std::fill(exclusive_addresses.begin(), exclusive_addresses.end(), INVALID_EXCLUSIVE_ADDRESS);
|
||||
Unlock();
|
||||
}
|
||||
|
||||
void ExclusiveMonitor::ClearProcessor(size_t processor_id) {
|
||||
Lock();
|
||||
exclusive_addresses[processor_id] = INVALID_EXCLUSIVE_ADDRESS;
|
||||
Unlock();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic
|
||||
59
src/dynarmic/src/dynarmic/backend/arm64/fastmem.h
Normal file
59
src/dynarmic/src/dynarmic/backend/arm64/fastmem.h
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <tuple>
|
||||
|
||||
#include <mcl/hash/xmrx.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include <ankerl/unordered_dense.h>
|
||||
|
||||
#include "dynarmic/backend/exception_handler.h"
|
||||
#include "dynarmic/ir/location_descriptor.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
using DoNotFastmemMarker = std::tuple<IR::LocationDescriptor, unsigned>;
|
||||
|
||||
struct DoNotFastmemMarkerHash {
|
||||
size_t operator()(const DoNotFastmemMarker& value) const {
|
||||
return mcl::hash::xmrx(std::get<0>(value).Value() ^ static_cast<u64>(std::get<1>(value)));
|
||||
}
|
||||
};
|
||||
|
||||
struct FastmemPatchInfo {
|
||||
DoNotFastmemMarker marker;
|
||||
FakeCall fc;
|
||||
bool recompile;
|
||||
};
|
||||
|
||||
class FastmemManager {
|
||||
public:
|
||||
explicit FastmemManager(ExceptionHandler& eh)
|
||||
: exception_handler(eh) {}
|
||||
|
||||
bool SupportsFastmem() const {
|
||||
return exception_handler.SupportsFastmem();
|
||||
}
|
||||
|
||||
bool ShouldFastmem(DoNotFastmemMarker marker) const {
|
||||
return do_not_fastmem.count(marker) == 0;
|
||||
}
|
||||
|
||||
void MarkDoNotFastmem(DoNotFastmemMarker marker) {
|
||||
do_not_fastmem.insert(marker);
|
||||
}
|
||||
|
||||
private:
|
||||
ExceptionHandler& exception_handler;
|
||||
ankerl::unordered_dense::set<DoNotFastmemMarker, DoNotFastmemMarkerHash> do_not_fastmem;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
49
src/dynarmic/src/dynarmic/backend/arm64/fpsr_manager.cpp
Normal file
49
src/dynarmic/src/dynarmic/backend/arm64/fpsr_manager.cpp
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
||||
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
using namespace oaknut::util;
|
||||
|
||||
FpsrManager::FpsrManager(oaknut::CodeGenerator& code, size_t state_fpsr_offset)
|
||||
: code{code}, state_fpsr_offset{state_fpsr_offset} {}
|
||||
|
||||
void FpsrManager::Spill() {
|
||||
if (!fpsr_loaded)
|
||||
return;
|
||||
|
||||
code.LDR(Wscratch0, Xstate, state_fpsr_offset);
|
||||
code.MRS(Xscratch1, oaknut::SystemReg::FPSR);
|
||||
code.ORR(Wscratch0, Wscratch0, Wscratch1);
|
||||
code.STR(Wscratch0, Xstate, state_fpsr_offset);
|
||||
|
||||
fpsr_loaded = false;
|
||||
}
|
||||
|
||||
void FpsrManager::Load() {
|
||||
if (fpsr_loaded)
|
||||
return;
|
||||
|
||||
code.MSR(oaknut::SystemReg::FPSR, XZR);
|
||||
|
||||
fpsr_loaded = true;
|
||||
}
|
||||
|
||||
void FpsrManager::GetFpsr(oaknut::WReg dest) {
|
||||
code.LDR(dest, Xstate, state_fpsr_offset);
|
||||
|
||||
if (fpsr_loaded) {
|
||||
code.MRS(Xscratch1, oaknut::SystemReg::FPSR);
|
||||
code.ORR(dest, dest, Wscratch1);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
36
src/dynarmic/src/dynarmic/backend/arm64/fpsr_manager.h
Normal file
36
src/dynarmic/src/dynarmic/backend/arm64/fpsr_manager.h
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
namespace oaknut {
|
||||
struct CodeGenerator;
|
||||
struct WReg;
|
||||
} // namespace oaknut
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
class FpsrManager {
|
||||
public:
|
||||
explicit FpsrManager(oaknut::CodeGenerator& code, size_t state_fpsr_offset);
|
||||
|
||||
void Spill();
|
||||
void Load();
|
||||
void Overwrite() { fpsr_loaded = false; }
|
||||
|
||||
void GetFpsr(oaknut::WReg);
|
||||
|
||||
private:
|
||||
oaknut::CodeGenerator& code;
|
||||
size_t state_fpsr_offset;
|
||||
bool fpsr_loaded = false;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
616
src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.cpp
Normal file
616
src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.cpp
Normal file
|
|
@ -0,0 +1,616 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <iterator>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include <mcl/bit/bit_field.hpp>
|
||||
#include <mcl/bit_cast.hpp>
|
||||
#include <mcl/mp/metavalue/lift_value.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/backend/arm64/abi.h"
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
||||
#include "dynarmic/backend/arm64/verbose_debugging_output.h"
|
||||
#include "dynarmic/common/always_false.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
using namespace oaknut::util;
|
||||
|
||||
constexpr size_t spill_offset = offsetof(StackLayout, spill);
|
||||
constexpr size_t spill_slot_size = sizeof(decltype(StackLayout::spill)::value_type);
|
||||
|
||||
static bool IsValuelessType(IR::Type type) {
|
||||
switch (type) {
|
||||
case IR::Type::Table:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
IR::Type Argument::GetType() const {
|
||||
return value.GetType();
|
||||
}
|
||||
|
||||
bool Argument::IsImmediate() const {
|
||||
return value.IsImmediate();
|
||||
}
|
||||
|
||||
bool Argument::GetImmediateU1() const {
|
||||
return value.GetU1();
|
||||
}
|
||||
|
||||
u8 Argument::GetImmediateU8() const {
|
||||
const u64 imm = value.GetImmediateAsU64();
|
||||
ASSERT(imm < 0x100);
|
||||
return u8(imm);
|
||||
}
|
||||
|
||||
u16 Argument::GetImmediateU16() const {
|
||||
const u64 imm = value.GetImmediateAsU64();
|
||||
ASSERT(imm < 0x10000);
|
||||
return u16(imm);
|
||||
}
|
||||
|
||||
u32 Argument::GetImmediateU32() const {
|
||||
const u64 imm = value.GetImmediateAsU64();
|
||||
ASSERT(imm < 0x100000000);
|
||||
return u32(imm);
|
||||
}
|
||||
|
||||
u64 Argument::GetImmediateU64() const {
|
||||
return value.GetImmediateAsU64();
|
||||
}
|
||||
|
||||
IR::Cond Argument::GetImmediateCond() const {
|
||||
ASSERT(IsImmediate() && GetType() == IR::Type::Cond);
|
||||
return value.GetCond();
|
||||
}
|
||||
|
||||
IR::AccType Argument::GetImmediateAccType() const {
|
||||
ASSERT(IsImmediate() && GetType() == IR::Type::AccType);
|
||||
return value.GetAccType();
|
||||
}
|
||||
|
||||
HostLoc::Kind Argument::CurrentLocationKind() const {
|
||||
return reg_alloc.ValueLocation(value.GetInst())->kind;
|
||||
}
|
||||
|
||||
bool HostLocInfo::Contains(const IR::Inst* value) const {
|
||||
return std::find(values.begin(), values.end(), value) != values.end();
|
||||
}
|
||||
|
||||
void HostLocInfo::SetupScratchLocation() {
|
||||
ASSERT(IsCompletelyEmpty());
|
||||
realized = true;
|
||||
}
|
||||
|
||||
void HostLocInfo::SetupLocation(const IR::Inst* value) {
|
||||
ASSERT(IsCompletelyEmpty());
|
||||
values.clear();
|
||||
values.push_back(value);
|
||||
realized = true;
|
||||
uses_this_inst = 0;
|
||||
accumulated_uses = 0;
|
||||
expected_uses = value->UseCount();
|
||||
}
|
||||
|
||||
bool HostLocInfo::IsCompletelyEmpty() const {
|
||||
return values.empty() && !locked && !realized && !accumulated_uses && !expected_uses && !uses_this_inst;
|
||||
}
|
||||
|
||||
bool HostLocInfo::MaybeAllocatable() const {
|
||||
return !locked && !realized;
|
||||
}
|
||||
|
||||
bool HostLocInfo::IsOneRemainingUse() const {
|
||||
return accumulated_uses + 1 == expected_uses && uses_this_inst == 1;
|
||||
}
|
||||
|
||||
void HostLocInfo::UpdateUses() {
|
||||
accumulated_uses += uses_this_inst;
|
||||
uses_this_inst = 0;
|
||||
|
||||
if (accumulated_uses == expected_uses) {
|
||||
values.clear();
|
||||
accumulated_uses = 0;
|
||||
expected_uses = 0;
|
||||
}
|
||||
}
|
||||
|
||||
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
|
||||
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
|
||||
for (size_t i = 0; i < inst->NumArgs(); i++) {
|
||||
const IR::Value arg = inst->GetArg(i);
|
||||
ret[i].value = arg;
|
||||
if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) {
|
||||
ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined");
|
||||
ValueInfo(arg.GetInst()).uses_this_inst++;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool RegAlloc::WasValueDefined(IR::Inst* inst) const {
|
||||
return defined_insts.count(inst) > 0;
|
||||
}
|
||||
|
||||
void RegAlloc::PrepareForCall(std::optional<Argument::copyable_reference> arg0, std::optional<Argument::copyable_reference> arg1, std::optional<Argument::copyable_reference> arg2, std::optional<Argument::copyable_reference> arg3) {
|
||||
fpsr_manager.Spill();
|
||||
SpillFlags();
|
||||
|
||||
// TODO: Spill into callee-save registers
|
||||
|
||||
for (int i = 0; i < 32; i++) {
|
||||
if (mcl::bit::get_bit(i, static_cast<u32>(ABI_CALLER_SAVE))) {
|
||||
SpillGpr(i);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 32; i++) {
|
||||
if (mcl::bit::get_bit(i, static_cast<u32>(ABI_CALLER_SAVE >> 32))) {
|
||||
SpillFpr(i);
|
||||
}
|
||||
}
|
||||
|
||||
const std::array<std::optional<Argument::copyable_reference>, 4> args{arg0, arg1, arg2, arg3};
|
||||
|
||||
// AAPCS64 Next General-purpose Register Number
|
||||
int ngrn = 0;
|
||||
// AAPCS64 Next SIMD and Floating-point Register Number
|
||||
int nsrn = 0;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (args[i]) {
|
||||
if (args[i]->get().GetType() == IR::Type::U128) {
|
||||
ASSERT(fprs[nsrn].IsCompletelyEmpty());
|
||||
LoadCopyInto(args[i]->get().value, oaknut::QReg{nsrn});
|
||||
nsrn++;
|
||||
} else {
|
||||
ASSERT(gprs[ngrn].IsCompletelyEmpty());
|
||||
LoadCopyInto(args[i]->get().value, oaknut::XReg{ngrn});
|
||||
ngrn++;
|
||||
}
|
||||
} else {
|
||||
// Gaps are assumed to be in general-purpose registers
|
||||
// TODO: should there be a separate list passed for FPRs instead?
|
||||
ngrn++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RegAlloc::DefineAsExisting(IR::Inst* inst, Argument& arg) {
|
||||
defined_insts.insert(inst);
|
||||
|
||||
ASSERT(!ValueLocation(inst));
|
||||
|
||||
if (arg.value.IsImmediate()) {
|
||||
inst->ReplaceUsesWith(arg.value);
|
||||
return;
|
||||
}
|
||||
|
||||
auto& info = ValueInfo(arg.value.GetInst());
|
||||
info.values.push_back(inst);
|
||||
info.expected_uses += inst->UseCount();
|
||||
}
|
||||
|
||||
void RegAlloc::DefineAsRegister(IR::Inst* inst, oaknut::Reg reg) {
|
||||
defined_insts.insert(inst);
|
||||
|
||||
ASSERT(!ValueLocation(inst));
|
||||
auto& info = reg.is_vector() ? fprs[reg.index()] : gprs[reg.index()];
|
||||
ASSERT(info.IsCompletelyEmpty());
|
||||
info.values.push_back(inst);
|
||||
info.expected_uses += inst->UseCount();
|
||||
}
|
||||
|
||||
void RegAlloc::UpdateAllUses() {
|
||||
for (auto& gpr : gprs) {
|
||||
gpr.UpdateUses();
|
||||
}
|
||||
for (auto& fpr : fprs) {
|
||||
fpr.UpdateUses();
|
||||
}
|
||||
flags.UpdateUses();
|
||||
for (auto& spill : spills) {
|
||||
spill.UpdateUses();
|
||||
}
|
||||
}
|
||||
|
||||
void RegAlloc::AssertAllUnlocked() const {
|
||||
const auto is_unlocked = [](const auto& i) { return !i.locked && !i.realized; };
|
||||
ASSERT(std::all_of(gprs.begin(), gprs.end(), is_unlocked));
|
||||
ASSERT(std::all_of(fprs.begin(), fprs.end(), is_unlocked));
|
||||
ASSERT(is_unlocked(flags));
|
||||
ASSERT(std::all_of(spills.begin(), spills.end(), is_unlocked));
|
||||
}
|
||||
|
||||
void RegAlloc::AssertNoMoreUses() const {
|
||||
const auto is_empty = [](const auto& i) { return i.IsCompletelyEmpty(); };
|
||||
ASSERT(std::all_of(gprs.begin(), gprs.end(), is_empty));
|
||||
ASSERT(std::all_of(fprs.begin(), fprs.end(), is_empty));
|
||||
ASSERT(is_empty(flags));
|
||||
ASSERT(std::all_of(spills.begin(), spills.end(), is_empty));
|
||||
}
|
||||
|
||||
void RegAlloc::EmitVerboseDebuggingOutput() {
|
||||
code.MOV(X19, mcl::bit_cast<u64>(&PrintVerboseDebuggingOutputLine)); // Non-volatile register
|
||||
|
||||
const auto do_location = [&](HostLocInfo& info, HostLocType type, size_t index) {
|
||||
using namespace oaknut::util;
|
||||
for (const IR::Inst* value : info.values) {
|
||||
code.MOV(X0, SP);
|
||||
code.MOV(X1, static_cast<u64>(type));
|
||||
code.MOV(X2, index);
|
||||
code.MOV(X3, value->GetName());
|
||||
code.MOV(X4, static_cast<u64>(value->GetType()));
|
||||
code.BLR(X19);
|
||||
}
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < gprs.size(); i++) {
|
||||
do_location(gprs[i], HostLocType::X, i);
|
||||
}
|
||||
for (size_t i = 0; i < fprs.size(); i++) {
|
||||
do_location(fprs[i], HostLocType::Q, i);
|
||||
}
|
||||
do_location(flags, HostLocType::Nzcv, 0);
|
||||
for (size_t i = 0; i < spills.size(); i++) {
|
||||
do_location(spills[i], HostLocType::Spill, i);
|
||||
}
|
||||
}
|
||||
|
||||
template<HostLoc::Kind kind>
|
||||
int RegAlloc::GenerateImmediate(const IR::Value& value) {
|
||||
ASSERT(value.GetType() != IR::Type::U1);
|
||||
if constexpr (kind == HostLoc::Kind::Gpr) {
|
||||
const int new_location_index = AllocateRegister(gprs, gpr_order);
|
||||
SpillGpr(new_location_index);
|
||||
gprs[new_location_index].SetupScratchLocation();
|
||||
|
||||
code.MOV(oaknut::XReg{new_location_index}, value.GetImmediateAsU64());
|
||||
|
||||
return new_location_index;
|
||||
} else if constexpr (kind == HostLoc::Kind::Fpr) {
|
||||
const int new_location_index = AllocateRegister(fprs, fpr_order);
|
||||
SpillFpr(new_location_index);
|
||||
fprs[new_location_index].SetupScratchLocation();
|
||||
|
||||
code.MOV(Xscratch0, value.GetImmediateAsU64());
|
||||
code.FMOV(oaknut::DReg{new_location_index}, Xscratch0);
|
||||
|
||||
return new_location_index;
|
||||
} else if constexpr (kind == HostLoc::Kind::Flags) {
|
||||
SpillFlags();
|
||||
flags.SetupScratchLocation();
|
||||
|
||||
code.MOV(Xscratch0, value.GetImmediateAsU64());
|
||||
code.MSR(oaknut::SystemReg::NZCV, Xscratch0);
|
||||
|
||||
return 0;
|
||||
} else {
|
||||
static_assert(Common::always_false_v<mcl::mp::lift_value<kind>>);
|
||||
}
|
||||
}
|
||||
|
||||
template<HostLoc::Kind required_kind>
|
||||
int RegAlloc::RealizeReadImpl(const IR::Value& value) {
|
||||
if (value.IsImmediate()) {
|
||||
return GenerateImmediate<required_kind>(value);
|
||||
}
|
||||
|
||||
const auto current_location = ValueLocation(value.GetInst());
|
||||
ASSERT(current_location);
|
||||
|
||||
if (current_location->kind == required_kind) {
|
||||
ValueInfo(*current_location).realized = true;
|
||||
return current_location->index;
|
||||
}
|
||||
|
||||
ASSERT(!ValueInfo(*current_location).realized);
|
||||
ASSERT(ValueInfo(*current_location).locked);
|
||||
|
||||
if constexpr (required_kind == HostLoc::Kind::Gpr) {
|
||||
const int new_location_index = AllocateRegister(gprs, gpr_order);
|
||||
SpillGpr(new_location_index);
|
||||
|
||||
switch (current_location->kind) {
|
||||
case HostLoc::Kind::Gpr:
|
||||
ASSERT_FALSE("Logic error");
|
||||
break;
|
||||
case HostLoc::Kind::Fpr:
|
||||
code.FMOV(oaknut::XReg{new_location_index}, oaknut::DReg{current_location->index});
|
||||
// ASSERT size fits
|
||||
break;
|
||||
case HostLoc::Kind::Spill:
|
||||
code.LDR(oaknut::XReg{new_location_index}, SP, spill_offset + current_location->index * spill_slot_size);
|
||||
break;
|
||||
case HostLoc::Kind::Flags:
|
||||
code.MRS(oaknut::XReg{new_location_index}, oaknut::SystemReg::NZCV);
|
||||
break;
|
||||
}
|
||||
|
||||
gprs[new_location_index] = std::exchange(ValueInfo(*current_location), {});
|
||||
gprs[new_location_index].realized = true;
|
||||
return new_location_index;
|
||||
} else if constexpr (required_kind == HostLoc::Kind::Fpr) {
|
||||
const int new_location_index = AllocateRegister(fprs, fpr_order);
|
||||
SpillFpr(new_location_index);
|
||||
|
||||
switch (current_location->kind) {
|
||||
case HostLoc::Kind::Gpr:
|
||||
code.FMOV(oaknut::DReg{new_location_index}, oaknut::XReg{current_location->index});
|
||||
break;
|
||||
case HostLoc::Kind::Fpr:
|
||||
ASSERT_FALSE("Logic error");
|
||||
break;
|
||||
case HostLoc::Kind::Spill:
|
||||
code.LDR(oaknut::QReg{new_location_index}, SP, spill_offset + current_location->index * spill_slot_size);
|
||||
break;
|
||||
case HostLoc::Kind::Flags:
|
||||
ASSERT_FALSE("Moving from flags into fprs is not currently supported");
|
||||
break;
|
||||
}
|
||||
|
||||
fprs[new_location_index] = std::exchange(ValueInfo(*current_location), {});
|
||||
fprs[new_location_index].realized = true;
|
||||
return new_location_index;
|
||||
} else if constexpr (required_kind == HostLoc::Kind::Flags) {
|
||||
ASSERT_FALSE("A simple read from flags is likely a logic error.");
|
||||
} else {
|
||||
static_assert(Common::always_false_v<mcl::mp::lift_value<required_kind>>);
|
||||
}
|
||||
}
|
||||
|
||||
template<HostLoc::Kind kind>
|
||||
int RegAlloc::RealizeWriteImpl(const IR::Inst* value) {
|
||||
defined_insts.insert(value);
|
||||
|
||||
ASSERT(!ValueLocation(value));
|
||||
|
||||
if constexpr (kind == HostLoc::Kind::Gpr) {
|
||||
const int new_location_index = AllocateRegister(gprs, gpr_order);
|
||||
SpillGpr(new_location_index);
|
||||
gprs[new_location_index].SetupLocation(value);
|
||||
return new_location_index;
|
||||
} else if constexpr (kind == HostLoc::Kind::Fpr) {
|
||||
const int new_location_index = AllocateRegister(fprs, fpr_order);
|
||||
SpillFpr(new_location_index);
|
||||
fprs[new_location_index].SetupLocation(value);
|
||||
return new_location_index;
|
||||
} else if constexpr (kind == HostLoc::Kind::Flags) {
|
||||
SpillFlags();
|
||||
flags.SetupLocation(value);
|
||||
return 0;
|
||||
} else {
|
||||
static_assert(Common::always_false_v<mcl::mp::lift_value<kind>>);
|
||||
}
|
||||
}
|
||||
|
||||
template<HostLoc::Kind kind>
|
||||
int RegAlloc::RealizeReadWriteImpl(const IR::Value& read_value, const IR::Inst* write_value) {
|
||||
defined_insts.insert(write_value);
|
||||
|
||||
// TODO: Move elimination
|
||||
|
||||
const int write_loc = RealizeWriteImpl<kind>(write_value);
|
||||
|
||||
if constexpr (kind == HostLoc::Kind::Gpr) {
|
||||
LoadCopyInto(read_value, oaknut::XReg{write_loc});
|
||||
return write_loc;
|
||||
} else if constexpr (kind == HostLoc::Kind::Fpr) {
|
||||
LoadCopyInto(read_value, oaknut::QReg{write_loc});
|
||||
return write_loc;
|
||||
} else if constexpr (kind == HostLoc::Kind::Flags) {
|
||||
ASSERT_FALSE("Incorrect function for ReadWrite of flags");
|
||||
} else {
|
||||
static_assert(Common::always_false_v<mcl::mp::lift_value<kind>>);
|
||||
}
|
||||
}
|
||||
|
||||
template int RegAlloc::RealizeReadImpl<HostLoc::Kind::Gpr>(const IR::Value& value);
|
||||
template int RegAlloc::RealizeReadImpl<HostLoc::Kind::Fpr>(const IR::Value& value);
|
||||
template int RegAlloc::RealizeReadImpl<HostLoc::Kind::Flags>(const IR::Value& value);
|
||||
template int RegAlloc::RealizeWriteImpl<HostLoc::Kind::Gpr>(const IR::Inst* value);
|
||||
template int RegAlloc::RealizeWriteImpl<HostLoc::Kind::Fpr>(const IR::Inst* value);
|
||||
template int RegAlloc::RealizeWriteImpl<HostLoc::Kind::Flags>(const IR::Inst* value);
|
||||
template int RegAlloc::RealizeReadWriteImpl<HostLoc::Kind::Gpr>(const IR::Value&, const IR::Inst*);
|
||||
template int RegAlloc::RealizeReadWriteImpl<HostLoc::Kind::Fpr>(const IR::Value&, const IR::Inst*);
|
||||
template int RegAlloc::RealizeReadWriteImpl<HostLoc::Kind::Flags>(const IR::Value&, const IR::Inst*);
|
||||
|
||||
int RegAlloc::AllocateRegister(const std::array<HostLocInfo, 32>& regs, const std::vector<int>& order) const {
|
||||
const auto empty = std::find_if(order.begin(), order.end(), [&](int i) { return regs[i].IsCompletelyEmpty(); });
|
||||
if (empty != order.end()) {
|
||||
return *empty;
|
||||
}
|
||||
|
||||
std::vector<int> candidates;
|
||||
std::copy_if(order.begin(), order.end(), std::back_inserter(candidates), [&](int i) { return regs[i].MaybeAllocatable(); });
|
||||
|
||||
// TODO: LRU
|
||||
std::uniform_int_distribution<size_t> dis{0, candidates.size() - 1};
|
||||
return candidates[dis(rand_gen)];
|
||||
}
|
||||
|
||||
void RegAlloc::SpillGpr(int index) {
|
||||
ASSERT(!gprs[index].locked && !gprs[index].realized);
|
||||
if (gprs[index].values.empty()) {
|
||||
return;
|
||||
}
|
||||
const int new_location_index = FindFreeSpill();
|
||||
code.STR(oaknut::XReg{index}, SP, spill_offset + new_location_index * spill_slot_size);
|
||||
spills[new_location_index] = std::exchange(gprs[index], {});
|
||||
}
|
||||
|
||||
void RegAlloc::SpillFpr(int index) {
|
||||
ASSERT(!fprs[index].locked && !fprs[index].realized);
|
||||
if (fprs[index].values.empty()) {
|
||||
return;
|
||||
}
|
||||
const int new_location_index = FindFreeSpill();
|
||||
code.STR(oaknut::QReg{index}, SP, spill_offset + new_location_index * spill_slot_size);
|
||||
spills[new_location_index] = std::exchange(fprs[index], {});
|
||||
}
|
||||
|
||||
void RegAlloc::ReadWriteFlags(Argument& read, IR::Inst* write) {
|
||||
defined_insts.insert(write);
|
||||
|
||||
const auto current_location = ValueLocation(read.value.GetInst());
|
||||
ASSERT(current_location);
|
||||
|
||||
if (current_location->kind == HostLoc::Kind::Flags) {
|
||||
if (!flags.IsOneRemainingUse()) {
|
||||
SpillFlags();
|
||||
}
|
||||
} else if (current_location->kind == HostLoc::Kind::Gpr) {
|
||||
if (!flags.values.empty()) {
|
||||
SpillFlags();
|
||||
}
|
||||
code.MSR(oaknut::SystemReg::NZCV, oaknut::XReg{current_location->index});
|
||||
} else if (current_location->kind == HostLoc::Kind::Spill) {
|
||||
if (!flags.values.empty()) {
|
||||
SpillFlags();
|
||||
}
|
||||
code.LDR(Wscratch0, SP, spill_offset + current_location->index * spill_slot_size);
|
||||
code.MSR(oaknut::SystemReg::NZCV, Xscratch0);
|
||||
} else {
|
||||
ASSERT_FALSE("Invalid current location for flags");
|
||||
}
|
||||
|
||||
if (write) {
|
||||
flags.SetupLocation(write);
|
||||
flags.realized = false;
|
||||
}
|
||||
}
|
||||
|
||||
void RegAlloc::SpillFlags() {
|
||||
ASSERT(!flags.locked && !flags.realized);
|
||||
if (flags.values.empty()) {
|
||||
return;
|
||||
}
|
||||
const int new_location_index = AllocateRegister(gprs, gpr_order);
|
||||
SpillGpr(new_location_index);
|
||||
code.MRS(oaknut::XReg{new_location_index}, oaknut::SystemReg::NZCV);
|
||||
gprs[new_location_index] = std::exchange(flags, {});
|
||||
}
|
||||
|
||||
int RegAlloc::FindFreeSpill() const {
|
||||
const auto iter = std::find_if(spills.begin(), spills.end(), [](const HostLocInfo& info) { return info.values.empty(); });
|
||||
ASSERT_MSG(iter != spills.end(), "All spill locations are full");
|
||||
return static_cast<int>(iter - spills.begin());
|
||||
}
|
||||
|
||||
void RegAlloc::LoadCopyInto(const IR::Value& value, oaknut::XReg reg) {
|
||||
if (value.IsImmediate()) {
|
||||
code.MOV(reg, value.GetImmediateAsU64());
|
||||
return;
|
||||
}
|
||||
|
||||
const auto current_location = ValueLocation(value.GetInst());
|
||||
ASSERT(current_location);
|
||||
switch (current_location->kind) {
|
||||
case HostLoc::Kind::Gpr:
|
||||
code.MOV(reg, oaknut::XReg{current_location->index});
|
||||
break;
|
||||
case HostLoc::Kind::Fpr:
|
||||
code.FMOV(reg, oaknut::DReg{current_location->index});
|
||||
// ASSERT size fits
|
||||
break;
|
||||
case HostLoc::Kind::Spill:
|
||||
code.LDR(reg, SP, spill_offset + current_location->index * spill_slot_size);
|
||||
break;
|
||||
case HostLoc::Kind::Flags:
|
||||
code.MRS(reg, oaknut::SystemReg::NZCV);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void RegAlloc::LoadCopyInto(const IR::Value& value, oaknut::QReg reg) {
|
||||
if (value.IsImmediate()) {
|
||||
code.MOV(Xscratch0, value.GetImmediateAsU64());
|
||||
code.FMOV(reg.toD(), Xscratch0);
|
||||
return;
|
||||
}
|
||||
|
||||
const auto current_location = ValueLocation(value.GetInst());
|
||||
ASSERT(current_location);
|
||||
switch (current_location->kind) {
|
||||
case HostLoc::Kind::Gpr:
|
||||
code.FMOV(reg.toD(), oaknut::XReg{current_location->index});
|
||||
break;
|
||||
case HostLoc::Kind::Fpr:
|
||||
code.MOV(reg.B16(), oaknut::QReg{current_location->index}.B16());
|
||||
break;
|
||||
case HostLoc::Kind::Spill:
|
||||
// TODO: Minimize move size to max value width
|
||||
code.LDR(reg, SP, spill_offset + current_location->index * spill_slot_size);
|
||||
break;
|
||||
case HostLoc::Kind::Flags:
|
||||
ASSERT_FALSE("Moving from flags into fprs is not currently supported");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<HostLoc> RegAlloc::ValueLocation(const IR::Inst* value) const {
|
||||
const auto contains_value = [value](const HostLocInfo& info) { return info.Contains(value); };
|
||||
|
||||
if (const auto iter = std::find_if(gprs.begin(), gprs.end(), contains_value); iter != gprs.end()) {
|
||||
return HostLoc{HostLoc::Kind::Gpr, static_cast<int>(iter - gprs.begin())};
|
||||
}
|
||||
if (const auto iter = std::find_if(fprs.begin(), fprs.end(), contains_value); iter != fprs.end()) {
|
||||
return HostLoc{HostLoc::Kind::Fpr, static_cast<int>(iter - fprs.begin())};
|
||||
}
|
||||
if (contains_value(flags)) {
|
||||
return HostLoc{HostLoc::Kind::Flags, 0};
|
||||
}
|
||||
if (const auto iter = std::find_if(spills.begin(), spills.end(), contains_value); iter != spills.end()) {
|
||||
return HostLoc{HostLoc::Kind::Spill, static_cast<int>(iter - spills.begin())};
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
HostLocInfo& RegAlloc::ValueInfo(HostLoc host_loc) {
|
||||
switch (host_loc.kind) {
|
||||
case HostLoc::Kind::Gpr:
|
||||
return gprs[static_cast<size_t>(host_loc.index)];
|
||||
case HostLoc::Kind::Fpr:
|
||||
return fprs[static_cast<size_t>(host_loc.index)];
|
||||
case HostLoc::Kind::Flags:
|
||||
return flags;
|
||||
case HostLoc::Kind::Spill:
|
||||
return spills[static_cast<size_t>(host_loc.index)];
|
||||
}
|
||||
ASSERT_FALSE("RegAlloc::ValueInfo: Invalid HostLoc::Kind");
|
||||
}
|
||||
|
||||
HostLocInfo& RegAlloc::ValueInfo(const IR::Inst* value) {
|
||||
const auto contains_value = [value](const HostLocInfo& info) { return info.Contains(value); };
|
||||
|
||||
if (const auto iter = std::find_if(gprs.begin(), gprs.end(), contains_value); iter != gprs.end()) {
|
||||
return *iter;
|
||||
}
|
||||
if (const auto iter = std::find_if(fprs.begin(), fprs.end(), contains_value); iter != fprs.end()) {
|
||||
return *iter;
|
||||
}
|
||||
if (contains_value(flags)) {
|
||||
return flags;
|
||||
}
|
||||
if (const auto iter = std::find_if(spills.begin(), spills.end(), contains_value); iter != spills.end()) {
|
||||
return *iter;
|
||||
}
|
||||
ASSERT_FALSE("RegAlloc::ValueInfo: Value not found");
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
379
src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.h
Normal file
379
src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.h
Normal file
|
|
@ -0,0 +1,379 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <optional>
|
||||
#include <random>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include <mcl/type_traits/is_instance_of_template.hpp>
|
||||
#include <oaknut/oaknut.hpp>
|
||||
#include <ankerl/unordered_dense.h>
|
||||
|
||||
#include "dynarmic/backend/arm64/stack_layout.h"
|
||||
#include "dynarmic/ir/cond.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/value.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
class FpsrManager;
|
||||
class RegAlloc;
|
||||
|
||||
struct HostLoc final {
|
||||
enum class Kind {
|
||||
Gpr,
|
||||
Fpr,
|
||||
Flags,
|
||||
Spill,
|
||||
} kind;
|
||||
int index;
|
||||
};
|
||||
|
||||
enum RWType {
|
||||
Void,
|
||||
Read,
|
||||
Write,
|
||||
ReadWrite,
|
||||
};
|
||||
|
||||
struct Argument final {
|
||||
public:
|
||||
using copyable_reference = std::reference_wrapper<Argument>;
|
||||
|
||||
IR::Type GetType() const;
|
||||
bool IsVoid() const { return GetType() == IR::Type::Void; }
|
||||
bool IsImmediate() const;
|
||||
|
||||
bool GetImmediateU1() const;
|
||||
u8 GetImmediateU8() const;
|
||||
u16 GetImmediateU16() const;
|
||||
u32 GetImmediateU32() const;
|
||||
u64 GetImmediateU64() const;
|
||||
IR::Cond GetImmediateCond() const;
|
||||
IR::AccType GetImmediateAccType() const;
|
||||
|
||||
// Only valid if not immediate
|
||||
HostLoc::Kind CurrentLocationKind() const;
|
||||
bool IsInGpr() const { return !IsImmediate() && CurrentLocationKind() == HostLoc::Kind::Gpr; }
|
||||
bool IsInFpr() const { return !IsImmediate() && CurrentLocationKind() == HostLoc::Kind::Fpr; }
|
||||
|
||||
private:
|
||||
friend class RegAlloc;
|
||||
explicit Argument(RegAlloc& reg_alloc)
|
||||
: reg_alloc{reg_alloc} {}
|
||||
|
||||
bool allocated = false;
|
||||
RegAlloc& reg_alloc;
|
||||
IR::Value value;
|
||||
};
|
||||
|
||||
struct FlagsTag final {
|
||||
private:
|
||||
template<typename>
|
||||
friend struct RAReg;
|
||||
|
||||
explicit FlagsTag(int) {}
|
||||
int index() const { return 0; }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct RAReg final {
|
||||
public:
|
||||
static constexpr HostLoc::Kind kind = !std::is_same_v<FlagsTag, T>
|
||||
? std::is_base_of_v<oaknut::VReg, T>
|
||||
? HostLoc::Kind::Fpr
|
||||
: HostLoc::Kind::Gpr
|
||||
: HostLoc::Kind::Flags;
|
||||
|
||||
operator T() const { return reg.value(); }
|
||||
|
||||
operator oaknut::WRegWsp() const
|
||||
requires(std::is_same_v<T, oaknut::WReg>)
|
||||
{
|
||||
return reg.value();
|
||||
}
|
||||
|
||||
operator oaknut::XRegSp() const
|
||||
requires(std::is_same_v<T, oaknut::XReg>)
|
||||
{
|
||||
return reg.value();
|
||||
}
|
||||
|
||||
T operator*() const { return reg.value(); }
|
||||
const T* operator->() const { return ®.value(); }
|
||||
|
||||
~RAReg();
|
||||
RAReg(RAReg&& other)
|
||||
: reg_alloc{other.reg_alloc}
|
||||
, rw{std::exchange(other.rw, RWType::Void)}
|
||||
, read_value{std::exchange(other.read_value, {})}
|
||||
, write_value{std::exchange(other.write_value, nullptr)}
|
||||
, reg{std::exchange(other.reg, std::nullopt)} {
|
||||
}
|
||||
RAReg& operator=(RAReg&&) = delete;
|
||||
|
||||
private:
|
||||
friend class RegAlloc;
|
||||
explicit RAReg(RegAlloc& reg_alloc, RWType rw, const IR::Value& read_value, const IR::Inst* write_value);
|
||||
|
||||
RAReg(const RAReg&) = delete;
|
||||
RAReg& operator=(const RAReg&) = delete;
|
||||
|
||||
void Realize();
|
||||
|
||||
RegAlloc& reg_alloc;
|
||||
RWType rw;
|
||||
IR::Value read_value;
|
||||
const IR::Inst* write_value;
|
||||
std::optional<T> reg;
|
||||
};
|
||||
|
||||
struct HostLocInfo final {
|
||||
std::vector<const IR::Inst*> values;
|
||||
size_t locked = 0;
|
||||
bool realized = false;
|
||||
size_t uses_this_inst = 0;
|
||||
size_t accumulated_uses = 0;
|
||||
size_t expected_uses = 0;
|
||||
|
||||
bool Contains(const IR::Inst*) const;
|
||||
void SetupScratchLocation();
|
||||
void SetupLocation(const IR::Inst*);
|
||||
bool IsCompletelyEmpty() const;
|
||||
bool MaybeAllocatable() const;
|
||||
bool IsOneRemainingUse() const;
|
||||
void UpdateUses();
|
||||
};
|
||||
|
||||
class RegAlloc final {
|
||||
public:
|
||||
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
|
||||
|
||||
explicit RegAlloc(oaknut::CodeGenerator& code, FpsrManager& fpsr_manager, std::vector<int> gpr_order, std::vector<int> fpr_order)
|
||||
: code{code}, fpsr_manager{fpsr_manager}, gpr_order{gpr_order}, fpr_order{fpr_order}, rand_gen{std::random_device{}()} {}
|
||||
|
||||
ArgumentInfo GetArgumentInfo(IR::Inst* inst);
|
||||
bool WasValueDefined(IR::Inst* inst) const;
|
||||
|
||||
auto ReadX(Argument& arg) { return RAReg<oaknut::XReg>{*this, RWType::Read, arg.value, nullptr}; }
|
||||
auto ReadW(Argument& arg) { return RAReg<oaknut::WReg>{*this, RWType::Read, arg.value, nullptr}; }
|
||||
|
||||
auto ReadQ(Argument& arg) { return RAReg<oaknut::QReg>{*this, RWType::Read, arg.value, nullptr}; }
|
||||
auto ReadD(Argument& arg) { return RAReg<oaknut::DReg>{*this, RWType::Read, arg.value, nullptr}; }
|
||||
auto ReadS(Argument& arg) { return RAReg<oaknut::SReg>{*this, RWType::Read, arg.value, nullptr}; }
|
||||
auto ReadH(Argument& arg) { return RAReg<oaknut::HReg>{*this, RWType::Read, arg.value, nullptr}; }
|
||||
auto ReadB(Argument& arg) { return RAReg<oaknut::BReg>{*this, RWType::Read, arg.value, nullptr}; }
|
||||
|
||||
template<size_t size>
|
||||
auto ReadReg(Argument& arg) {
|
||||
if constexpr (size == 64) {
|
||||
return ReadX(arg);
|
||||
} else if constexpr (size == 32) {
|
||||
return ReadW(arg);
|
||||
} else {
|
||||
ASSERT_FALSE("Invalid size to ReadReg {}", size);
|
||||
}
|
||||
}
|
||||
|
||||
template<size_t size>
|
||||
auto ReadVec(Argument& arg) {
|
||||
if constexpr (size == 128) {
|
||||
return ReadQ(arg);
|
||||
} else if constexpr (size == 64) {
|
||||
return ReadD(arg);
|
||||
} else if constexpr (size == 32) {
|
||||
return ReadS(arg);
|
||||
} else if constexpr (size == 16) {
|
||||
return ReadH(arg);
|
||||
} else if constexpr (size == 8) {
|
||||
return ReadB(arg);
|
||||
} else {
|
||||
ASSERT_FALSE("Invalid size to ReadVec {}", size);
|
||||
}
|
||||
}
|
||||
|
||||
auto WriteX(IR::Inst* inst) { return RAReg<oaknut::XReg>{*this, RWType::Write, {}, inst}; }
|
||||
auto WriteW(IR::Inst* inst) { return RAReg<oaknut::WReg>{*this, RWType::Write, {}, inst}; }
|
||||
|
||||
auto WriteQ(IR::Inst* inst) { return RAReg<oaknut::QReg>{*this, RWType::Write, {}, inst}; }
|
||||
auto WriteD(IR::Inst* inst) { return RAReg<oaknut::DReg>{*this, RWType::Write, {}, inst}; }
|
||||
auto WriteS(IR::Inst* inst) { return RAReg<oaknut::SReg>{*this, RWType::Write, {}, inst}; }
|
||||
auto WriteH(IR::Inst* inst) { return RAReg<oaknut::HReg>{*this, RWType::Write, {}, inst}; }
|
||||
auto WriteB(IR::Inst* inst) { return RAReg<oaknut::BReg>{*this, RWType::Write, {}, inst}; }
|
||||
|
||||
auto WriteFlags(IR::Inst* inst) { return RAReg<FlagsTag>{*this, RWType::Write, {}, inst}; }
|
||||
|
||||
template<size_t size>
|
||||
auto WriteReg(IR::Inst* inst) {
|
||||
if constexpr (size == 64) {
|
||||
return WriteX(inst);
|
||||
} else if constexpr (size == 32) {
|
||||
return WriteW(inst);
|
||||
} else {
|
||||
ASSERT_FALSE("Invalid size to WriteReg {}", size);
|
||||
}
|
||||
}
|
||||
|
||||
template<size_t size>
|
||||
auto WriteVec(IR::Inst* inst) {
|
||||
if constexpr (size == 128) {
|
||||
return WriteQ(inst);
|
||||
} else if constexpr (size == 64) {
|
||||
return WriteD(inst);
|
||||
} else if constexpr (size == 32) {
|
||||
return WriteS(inst);
|
||||
} else if constexpr (size == 16) {
|
||||
return WriteH(inst);
|
||||
} else if constexpr (size == 8) {
|
||||
return WriteB(inst);
|
||||
} else {
|
||||
ASSERT_FALSE("Invalid size to WriteVec {}", size);
|
||||
}
|
||||
}
|
||||
|
||||
auto ReadWriteX(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::XReg>{*this, RWType::ReadWrite, arg.value, inst}; }
|
||||
auto ReadWriteW(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::WReg>{*this, RWType::ReadWrite, arg.value, inst}; }
|
||||
|
||||
auto ReadWriteQ(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::QReg>{*this, RWType::ReadWrite, arg.value, inst}; }
|
||||
auto ReadWriteD(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::DReg>{*this, RWType::ReadWrite, arg.value, inst}; }
|
||||
auto ReadWriteS(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::SReg>{*this, RWType::ReadWrite, arg.value, inst}; }
|
||||
auto ReadWriteH(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::HReg>{*this, RWType::ReadWrite, arg.value, inst}; }
|
||||
auto ReadWriteB(Argument& arg, const IR::Inst* inst) { return RAReg<oaknut::BReg>{*this, RWType::ReadWrite, arg.value, inst}; }
|
||||
|
||||
template<size_t size>
|
||||
auto ReadWriteReg(Argument& arg, const IR::Inst* inst) {
|
||||
if constexpr (size == 64) {
|
||||
return ReadWriteX(arg, inst);
|
||||
} else if constexpr (size == 32) {
|
||||
return ReadWriteW(arg, inst);
|
||||
} else {
|
||||
ASSERT_FALSE("Invalid size to ReadWriteReg {}", size);
|
||||
}
|
||||
}
|
||||
|
||||
template<size_t size>
|
||||
auto ReadWriteVec(Argument& arg, const IR::Inst* inst) {
|
||||
if constexpr (size == 128) {
|
||||
return ReadWriteQ(arg, inst);
|
||||
} else if constexpr (size == 64) {
|
||||
return ReadWriteD(arg, inst);
|
||||
} else if constexpr (size == 32) {
|
||||
return ReadWriteS(arg, inst);
|
||||
} else if constexpr (size == 16) {
|
||||
return ReadWriteH(arg, inst);
|
||||
} else if constexpr (size == 8) {
|
||||
return ReadWriteB(arg, inst);
|
||||
} else {
|
||||
ASSERT_FALSE("Invalid size to ReadWriteVec {}", size);
|
||||
}
|
||||
}
|
||||
|
||||
void PrepareForCall(std::optional<Argument::copyable_reference> arg0 = {}, std::optional<Argument::copyable_reference> arg1 = {}, std::optional<Argument::copyable_reference> arg2 = {}, std::optional<Argument::copyable_reference> arg3 = {});
|
||||
|
||||
void DefineAsExisting(IR::Inst* inst, Argument& arg);
|
||||
void DefineAsRegister(IR::Inst* inst, oaknut::Reg reg);
|
||||
|
||||
void ReadWriteFlags(Argument& read, IR::Inst* write);
|
||||
void SpillFlags();
|
||||
void SpillAll();
|
||||
|
||||
template<typename... Ts>
|
||||
static void Realize(Ts&... rs) {
|
||||
static_assert((mcl::is_instance_of_template<RAReg, Ts>() && ...));
|
||||
(rs.Realize(), ...);
|
||||
}
|
||||
|
||||
void UpdateAllUses();
|
||||
void AssertAllUnlocked() const;
|
||||
void AssertNoMoreUses() const;
|
||||
|
||||
void EmitVerboseDebuggingOutput();
|
||||
|
||||
private:
|
||||
friend struct Argument;
|
||||
template<typename>
|
||||
friend struct RAReg;
|
||||
|
||||
template<HostLoc::Kind kind>
|
||||
int GenerateImmediate(const IR::Value& value);
|
||||
template<HostLoc::Kind kind>
|
||||
int RealizeReadImpl(const IR::Value& value);
|
||||
template<HostLoc::Kind kind>
|
||||
int RealizeWriteImpl(const IR::Inst* value);
|
||||
template<HostLoc::Kind kind>
|
||||
int RealizeReadWriteImpl(const IR::Value& read_value, const IR::Inst* write_value);
|
||||
|
||||
int AllocateRegister(const std::array<HostLocInfo, 32>& regs, const std::vector<int>& order) const;
|
||||
void SpillGpr(int index);
|
||||
void SpillFpr(int index);
|
||||
int FindFreeSpill() const;
|
||||
|
||||
void LoadCopyInto(const IR::Value& value, oaknut::XReg reg);
|
||||
void LoadCopyInto(const IR::Value& value, oaknut::QReg reg);
|
||||
|
||||
std::optional<HostLoc> ValueLocation(const IR::Inst* value) const;
|
||||
HostLocInfo& ValueInfo(HostLoc host_loc);
|
||||
HostLocInfo& ValueInfo(const IR::Inst* value);
|
||||
|
||||
oaknut::CodeGenerator& code;
|
||||
FpsrManager& fpsr_manager;
|
||||
std::vector<int> gpr_order;
|
||||
std::vector<int> fpr_order;
|
||||
|
||||
std::array<HostLocInfo, 32> gprs;
|
||||
std::array<HostLocInfo, 32> fprs;
|
||||
HostLocInfo flags;
|
||||
std::array<HostLocInfo, SpillCount> spills;
|
||||
|
||||
mutable std::mt19937 rand_gen;
|
||||
|
||||
ankerl::unordered_dense::set<const IR::Inst*> defined_insts;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
RAReg<T>::RAReg(RegAlloc& reg_alloc, RWType rw, const IR::Value& read_value, const IR::Inst* write_value)
|
||||
: reg_alloc{reg_alloc}, rw{rw}, read_value{read_value}, write_value{write_value} {
|
||||
if (rw != RWType::Write && !read_value.IsImmediate()) {
|
||||
reg_alloc.ValueInfo(read_value.GetInst()).locked++;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
RAReg<T>::~RAReg() {
|
||||
if (rw != RWType::Write && !read_value.IsImmediate()) {
|
||||
reg_alloc.ValueInfo(read_value.GetInst()).locked--;
|
||||
}
|
||||
if (reg) {
|
||||
reg_alloc.ValueInfo(HostLoc{kind, reg->index()}).realized = false;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void RAReg<T>::Realize() {
|
||||
switch (rw) {
|
||||
case RWType::Read:
|
||||
reg = T{reg_alloc.RealizeReadImpl<kind>(read_value)};
|
||||
break;
|
||||
case RWType::Write:
|
||||
reg = T{reg_alloc.RealizeWriteImpl<kind>(write_value)};
|
||||
break;
|
||||
case RWType::ReadWrite:
|
||||
reg = T{reg_alloc.RealizeReadWriteImpl<kind>(read_value, write_value)};
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid RWType");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
52
src/dynarmic/src/dynarmic/backend/arm64/stack_layout.h
Normal file
52
src/dynarmic/src/dynarmic/backend/arm64/stack_layout.h
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning(push)
|
||||
# pragma warning(disable : 4324) // Structure was padded due to alignment specifier
|
||||
#endif
|
||||
|
||||
constexpr size_t SpillCount = 64;
|
||||
|
||||
struct alignas(16) RSBEntry {
|
||||
u64 target;
|
||||
u64 code_ptr;
|
||||
};
|
||||
|
||||
constexpr size_t RSBCount = 8;
|
||||
constexpr u64 RSBIndexMask = (RSBCount - 1) * sizeof(RSBEntry);
|
||||
|
||||
struct alignas(16) StackLayout {
|
||||
std::array<RSBEntry, RSBCount> rsb;
|
||||
|
||||
std::array<std::array<u64, 2>, SpillCount> spill;
|
||||
|
||||
u32 rsb_ptr;
|
||||
|
||||
s64 cycles_to_run;
|
||||
|
||||
u32 save_host_fpcr;
|
||||
|
||||
bool check_bit;
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning(pop)
|
||||
#endif
|
||||
|
||||
static_assert(sizeof(StackLayout) % 16 == 0);
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
|
|
@ -0,0 +1,108 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2023 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/arm64/verbose_debugging_output.h"
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "dynarmic/backend/arm64/emit_context.h"
|
||||
#include "dynarmic/ir/type.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
using namespace oaknut::util;
|
||||
|
||||
void EmitVerboseDebuggingOutput(oaknut::CodeGenerator& code, EmitContext& ctx) {
|
||||
code.SUB(SP, SP, sizeof(RegisterData));
|
||||
for (int i = 0; i < 30; i++) {
|
||||
if (i == 18) {
|
||||
continue; // Platform register
|
||||
}
|
||||
code.STR(oaknut::XReg{i}, SP, offsetof(RegisterData, x) + i * sizeof(u64));
|
||||
}
|
||||
for (int i = 0; i < 32; i++) {
|
||||
code.STR(oaknut::QReg{i}, SP, offsetof(RegisterData, q) + i * sizeof(Vector));
|
||||
}
|
||||
code.MRS(X0, oaknut::SystemReg::NZCV);
|
||||
code.STR(X0, SP, offsetof(RegisterData, nzcv));
|
||||
code.ADD(X0, SP, sizeof(RegisterData) + offsetof(StackLayout, spill));
|
||||
code.STR(X0, SP, offsetof(RegisterData, spill));
|
||||
code.MRS(X0, oaknut::SystemReg::FPSR);
|
||||
code.STR(X0, SP, offsetof(RegisterData, fpsr));
|
||||
|
||||
ctx.reg_alloc.EmitVerboseDebuggingOutput();
|
||||
|
||||
code.LDR(X0, SP, offsetof(RegisterData, fpsr));
|
||||
code.MSR(oaknut::SystemReg::FPSR, X0);
|
||||
code.LDR(X0, SP, offsetof(RegisterData, nzcv));
|
||||
code.MSR(oaknut::SystemReg::NZCV, X0);
|
||||
for (int i = 0; i < 32; i++) {
|
||||
code.LDR(oaknut::QReg{i}, SP, offsetof(RegisterData, q) + i * sizeof(Vector));
|
||||
}
|
||||
for (int i = 0; i < 30; i++) {
|
||||
if (i == 18) {
|
||||
continue; // Platform register
|
||||
}
|
||||
code.LDR(oaknut::XReg{i}, SP, offsetof(RegisterData, x) + i * sizeof(u64));
|
||||
}
|
||||
code.ADD(SP, SP, sizeof(RegisterData));
|
||||
}
|
||||
|
||||
void PrintVerboseDebuggingOutputLine(RegisterData& reg_data, HostLocType reg_type, size_t reg_index, size_t inst_index, IR::Type inst_type) {
|
||||
fmt::print("dynarmic debug: %{:05} = ", inst_index);
|
||||
|
||||
Vector value = [&]() -> Vector {
|
||||
switch (reg_type) {
|
||||
case HostLocType::X:
|
||||
return {reg_data.x[reg_index], 0};
|
||||
case HostLocType::Q:
|
||||
return reg_data.q[reg_index];
|
||||
case HostLocType::Nzcv:
|
||||
return {reg_data.nzcv, 0};
|
||||
case HostLocType::Spill:
|
||||
return (*reg_data.spill)[reg_index];
|
||||
}
|
||||
fmt::print("invalid reg_type! ");
|
||||
return {0, 0};
|
||||
}();
|
||||
|
||||
switch (inst_type) {
|
||||
case IR::Type::U1:
|
||||
case IR::Type::U8:
|
||||
fmt::print("{:02x}", value[0] & 0xff);
|
||||
break;
|
||||
case IR::Type::U16:
|
||||
fmt::print("{:04x}", value[0] & 0xffff);
|
||||
break;
|
||||
case IR::Type::U32:
|
||||
case IR::Type::NZCVFlags:
|
||||
fmt::print("{:08x}", value[0] & 0xffffffff);
|
||||
break;
|
||||
case IR::Type::U64:
|
||||
fmt::print("{:016x}", value[0]);
|
||||
break;
|
||||
case IR::Type::U128:
|
||||
fmt::print("{:016x}{:016x}", value[1], value[0]);
|
||||
break;
|
||||
case IR::Type::A32Reg:
|
||||
case IR::Type::A32ExtReg:
|
||||
case IR::Type::A64Reg:
|
||||
case IR::Type::A64Vec:
|
||||
case IR::Type::CoprocInfo:
|
||||
case IR::Type::Cond:
|
||||
case IR::Type::Void:
|
||||
case IR::Type::Table:
|
||||
case IR::Type::AccType:
|
||||
case IR::Type::Opaque:
|
||||
default:
|
||||
fmt::print("invalid inst_type!");
|
||||
break;
|
||||
}
|
||||
|
||||
fmt::print("\n");
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2023 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/backend/arm64/stack_layout.h"
|
||||
|
||||
namespace oaknut {
|
||||
struct CodeGenerator;
|
||||
struct Label;
|
||||
} // namespace oaknut
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
enum class Type : u16;
|
||||
} // namespace Dynarmic::IR
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
struct EmitContext;
|
||||
|
||||
using Vector = std::array<u64, 2>;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning(push)
|
||||
# pragma warning(disable : 4324) // Structure was padded due to alignment specifier
|
||||
#endif
|
||||
|
||||
enum class HostLocType {
|
||||
X,
|
||||
Q,
|
||||
Nzcv,
|
||||
Spill,
|
||||
};
|
||||
|
||||
struct alignas(16) RegisterData {
|
||||
std::array<u64, 30> x;
|
||||
std::array<Vector, 32> q;
|
||||
u32 nzcv;
|
||||
decltype(StackLayout::spill)* spill;
|
||||
u32 fpsr;
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning(pop)
|
||||
#endif
|
||||
|
||||
void EmitVerboseDebuggingOutput(oaknut::CodeGenerator& code, EmitContext& ctx);
|
||||
void PrintVerboseDebuggingOutputLine(RegisterData& reg_data, HostLocType reg_type, size_t reg_index, size_t inst_index, IR::Type inst_type);
|
||||
|
||||
} // namespace Dynarmic::Backend::Arm64
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/block_range_information.h"
|
||||
|
||||
#include <boost/icl/interval_map.hpp>
|
||||
#include <boost/icl/interval_set.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include <ankerl/unordered_dense.h>
|
||||
|
||||
namespace Dynarmic::Backend {
|
||||
|
||||
template<typename ProgramCounterType>
|
||||
void BlockRangeInformation<ProgramCounterType>::AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location) {
|
||||
block_ranges.add(std::make_pair(range, std::set<IR::LocationDescriptor>{location}));
|
||||
}
|
||||
|
||||
template<typename ProgramCounterType>
|
||||
void BlockRangeInformation<ProgramCounterType>::ClearCache() {
|
||||
block_ranges.clear();
|
||||
}
|
||||
|
||||
template<typename ProgramCounterType>
|
||||
ankerl::unordered_dense::set<IR::LocationDescriptor> BlockRangeInformation<ProgramCounterType>::InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges) {
|
||||
ankerl::unordered_dense::set<IR::LocationDescriptor> erase_locations;
|
||||
for (auto invalidate_interval : ranges) {
|
||||
auto pair = block_ranges.equal_range(invalidate_interval);
|
||||
for (auto it = pair.first; it != pair.second; ++it) {
|
||||
for (const auto& descriptor : it->second) {
|
||||
erase_locations.insert(descriptor);
|
||||
}
|
||||
}
|
||||
}
|
||||
// TODO: EFFICIENCY: Remove ranges that are to be erased.
|
||||
return erase_locations;
|
||||
}
|
||||
|
||||
template class BlockRangeInformation<u32>;
|
||||
template class BlockRangeInformation<u64>;
|
||||
|
||||
} // namespace Dynarmic::Backend
|
||||
29
src/dynarmic/src/dynarmic/backend/block_range_information.h
Normal file
29
src/dynarmic/src/dynarmic/backend/block_range_information.h
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <set>
|
||||
|
||||
#include <boost/icl/interval_map.hpp>
|
||||
#include <boost/icl/interval_set.hpp>
|
||||
#include <ankerl/unordered_dense.h>
|
||||
|
||||
#include "dynarmic/ir/location_descriptor.h"
|
||||
|
||||
namespace Dynarmic::Backend {
|
||||
|
||||
template<typename ProgramCounterType>
|
||||
class BlockRangeInformation {
|
||||
public:
|
||||
void AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location);
|
||||
void ClearCache();
|
||||
ankerl::unordered_dense::set<IR::LocationDescriptor> InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges);
|
||||
|
||||
private:
|
||||
boost::icl::interval_map<ProgramCounterType, std::set<IR::LocationDescriptor>> block_ranges;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend
|
||||
75
src/dynarmic/src/dynarmic/backend/exception_handler.h
Normal file
75
src/dynarmic/src/dynarmic/backend/exception_handler.h
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2020 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
|
||||
#include <mcl/macro/architecture.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
namespace Dynarmic::Backend::X64 {
|
||||
class BlockOfCode;
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
namespace oaknut {
|
||||
class CodeBlock;
|
||||
} // namespace oaknut
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
class CodeBlock;
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
#endif
|
||||
|
||||
namespace Dynarmic::Backend {
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
struct FakeCall {
|
||||
u64 call_rip;
|
||||
u64 ret_rip;
|
||||
};
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
struct FakeCall {
|
||||
u64 call_pc;
|
||||
};
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
struct FakeCall {
|
||||
};
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
#endif
|
||||
|
||||
class ExceptionHandler final {
|
||||
public:
|
||||
ExceptionHandler();
|
||||
~ExceptionHandler();
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
void Register(X64::BlockOfCode& code);
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
void Register(oaknut::CodeBlock& mem, std::size_t mem_size);
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
void Register(RV64::CodeBlock& mem, std::size_t mem_size);
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
#endif
|
||||
|
||||
bool SupportsFastmem() const noexcept;
|
||||
void SetFastmemCallback(std::function<FakeCall(u64)> cb);
|
||||
|
||||
private:
|
||||
struct Impl;
|
||||
std::unique_ptr<Impl> impl;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/exception_handler.h"
|
||||
|
||||
namespace Dynarmic::Backend {
|
||||
|
||||
struct ExceptionHandler::Impl final {
|
||||
};
|
||||
|
||||
ExceptionHandler::ExceptionHandler() = default;
|
||||
ExceptionHandler::~ExceptionHandler() = default;
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
void ExceptionHandler::Register(X64::BlockOfCode&) {
|
||||
// Do nothing
|
||||
}
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
void ExceptionHandler::Register(oaknut::CodeBlock&, std::size_t) {
|
||||
// Do nothing
|
||||
}
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
void ExceptionHandler::Register(RV64::CodeBlock&, std::size_t) {
|
||||
// Do nothing
|
||||
}
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
#endif
|
||||
|
||||
bool ExceptionHandler::SupportsFastmem() const noexcept {
|
||||
return false;
|
||||
}
|
||||
|
||||
void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)>) {
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend
|
||||
296
src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp
Normal file
296
src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp
Normal file
|
|
@ -0,0 +1,296 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2019 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <mach/mach.h>
|
||||
#include <mach/message.h>
|
||||
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include <mcl/bit_cast.hpp>
|
||||
#include <mcl/macro/architecture.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/backend/exception_handler.h"
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
|
||||
# include "dynarmic/backend/x64/block_of_code.h"
|
||||
# define mig_external extern "C"
|
||||
# include "dynarmic/backend/x64/mig/mach_exc_server.h"
|
||||
|
||||
# define THREAD_STATE x86_THREAD_STATE64
|
||||
# define THREAD_STATE_COUNT x86_THREAD_STATE64_COUNT
|
||||
|
||||
using dynarmic_thread_state_t = x86_thread_state64_t;
|
||||
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
|
||||
# include <oaknut/code_block.hpp>
|
||||
# define mig_external extern "C"
|
||||
# include "dynarmic/backend/arm64/mig/mach_exc_server.h"
|
||||
|
||||
# define THREAD_STATE ARM_THREAD_STATE64
|
||||
# define THREAD_STATE_COUNT ARM_THREAD_STATE64_COUNT
|
||||
|
||||
using dynarmic_thread_state_t = arm_thread_state64_t;
|
||||
|
||||
#endif
|
||||
|
||||
namespace Dynarmic::Backend {
|
||||
|
||||
namespace {
|
||||
|
||||
struct CodeBlockInfo {
|
||||
u64 code_begin, code_end;
|
||||
std::function<FakeCall(u64)> cb;
|
||||
};
|
||||
|
||||
struct MachMessage {
|
||||
mach_msg_header_t head;
|
||||
char data[2048]; ///< Arbitrary size
|
||||
};
|
||||
|
||||
class MachHandler final {
|
||||
public:
|
||||
MachHandler();
|
||||
~MachHandler();
|
||||
|
||||
kern_return_t HandleRequest(dynarmic_thread_state_t* thread_state);
|
||||
|
||||
void AddCodeBlock(CodeBlockInfo info);
|
||||
void RemoveCodeBlock(u64 rip);
|
||||
|
||||
private:
|
||||
auto FindCodeBlockInfo(u64 rip) {
|
||||
return std::find_if(code_block_infos.begin(), code_block_infos.end(), [&](const auto& x) { return x.code_begin <= rip && x.code_end > rip; });
|
||||
}
|
||||
|
||||
std::vector<CodeBlockInfo> code_block_infos;
|
||||
std::mutex code_block_infos_mutex;
|
||||
|
||||
std::thread thread;
|
||||
mach_port_t server_port;
|
||||
|
||||
void MessagePump();
|
||||
};
|
||||
|
||||
MachHandler::MachHandler() {
|
||||
#define KCHECK(x) ASSERT_MSG((x) == KERN_SUCCESS, "dynarmic: macOS MachHandler: init failure at {}", #x)
|
||||
|
||||
KCHECK(mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &server_port));
|
||||
KCHECK(mach_port_insert_right(mach_task_self(), server_port, server_port, MACH_MSG_TYPE_MAKE_SEND));
|
||||
KCHECK(task_set_exception_ports(mach_task_self(), EXC_MASK_BAD_ACCESS, server_port, EXCEPTION_STATE | MACH_EXCEPTION_CODES, THREAD_STATE));
|
||||
|
||||
// The below doesn't actually work, and I'm not sure why; since this doesn't work we'll have a spurious error message upon shutdown.
|
||||
mach_port_t prev;
|
||||
KCHECK(mach_port_request_notification(mach_task_self(), server_port, MACH_NOTIFY_PORT_DESTROYED, 0, server_port, MACH_MSG_TYPE_MAKE_SEND_ONCE, &prev));
|
||||
|
||||
#undef KCHECK
|
||||
|
||||
thread = std::thread(&MachHandler::MessagePump, this);
|
||||
thread.detach();
|
||||
}
|
||||
|
||||
MachHandler::~MachHandler() {
|
||||
mach_port_deallocate(mach_task_self(), server_port);
|
||||
}
|
||||
|
||||
void MachHandler::MessagePump() {
|
||||
mach_msg_return_t mr;
|
||||
MachMessage request;
|
||||
MachMessage reply;
|
||||
|
||||
while (true) {
|
||||
mr = mach_msg(&request.head, MACH_RCV_MSG | MACH_RCV_LARGE, 0, sizeof(request), server_port, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
|
||||
if (mr != MACH_MSG_SUCCESS) {
|
||||
fmt::print(stderr, "dynarmic: macOS MachHandler: Failed to receive mach message. error: {:#08x} ({})\n", mr, mach_error_string(mr));
|
||||
return;
|
||||
}
|
||||
|
||||
if (!mach_exc_server(&request.head, &reply.head)) {
|
||||
fmt::print(stderr, "dynarmic: macOS MachHandler: Unexpected mach message\n");
|
||||
return;
|
||||
}
|
||||
|
||||
mr = mach_msg(&reply.head, MACH_SEND_MSG, reply.head.msgh_size, 0, MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
|
||||
if (mr != MACH_MSG_SUCCESS) {
|
||||
fmt::print(stderr, "dynarmic: macOS MachHandler: Failed to send mach message. error: {:#08x} ({})\n", mr, mach_error_string(mr));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) {
|
||||
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
|
||||
|
||||
const auto iter = FindCodeBlockInfo(ts->__rip);
|
||||
if (iter == code_block_infos.end()) {
|
||||
fmt::print(stderr, "Unhandled EXC_BAD_ACCESS at rip {:#016x}\n", ts->__rip);
|
||||
return KERN_FAILURE;
|
||||
}
|
||||
|
||||
FakeCall fc = iter->cb(ts->__rip);
|
||||
|
||||
ts->__rsp -= sizeof(u64);
|
||||
*mcl::bit_cast<u64*>(ts->__rsp) = fc.ret_rip;
|
||||
ts->__rip = fc.call_rip;
|
||||
|
||||
return KERN_SUCCESS;
|
||||
}
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
kern_return_t MachHandler::HandleRequest(arm_thread_state64_t* ts) {
|
||||
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
|
||||
|
||||
const auto iter = FindCodeBlockInfo(ts->__pc);
|
||||
if (iter == code_block_infos.end()) {
|
||||
fmt::print(stderr, "Unhandled EXC_BAD_ACCESS at pc {:#016x}\n", ts->__pc);
|
||||
return KERN_FAILURE;
|
||||
}
|
||||
|
||||
FakeCall fc = iter->cb(ts->__pc);
|
||||
|
||||
// TODO: Sign with ptrauth_sign_unauthenticated if pointer authentication is enabled.
|
||||
ts->__pc = fc.call_pc;
|
||||
|
||||
return KERN_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
void MachHandler::AddCodeBlock(CodeBlockInfo cbi) {
|
||||
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
|
||||
if (auto iter = FindCodeBlockInfo(cbi.code_begin); iter != code_block_infos.end()) {
|
||||
code_block_infos.erase(iter);
|
||||
}
|
||||
code_block_infos.push_back(cbi);
|
||||
}
|
||||
|
||||
void MachHandler::RemoveCodeBlock(u64 rip) {
|
||||
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
|
||||
const auto iter = FindCodeBlockInfo(rip);
|
||||
if (iter == code_block_infos.end()) {
|
||||
return;
|
||||
}
|
||||
code_block_infos.erase(iter);
|
||||
}
|
||||
|
||||
std::mutex handler_lock;
|
||||
std::optional<MachHandler> mach_handler;
|
||||
|
||||
void RegisterHandler() {
|
||||
std::lock_guard<std::mutex> guard(handler_lock);
|
||||
if (!mach_handler) {
|
||||
mach_handler.emplace();
|
||||
}
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
mig_external kern_return_t catch_mach_exception_raise(mach_port_t, mach_port_t, mach_port_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t) {
|
||||
fmt::print(stderr, "dynarmic: Unexpected mach message: mach_exception_raise\n");
|
||||
return KERN_FAILURE;
|
||||
}
|
||||
|
||||
mig_external kern_return_t catch_mach_exception_raise_state_identity(mach_port_t, mach_port_t, mach_port_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, int*, thread_state_t, mach_msg_type_number_t, thread_state_t, mach_msg_type_number_t*) {
|
||||
fmt::print(stderr, "dynarmic: Unexpected mach message: mach_exception_raise_state_identity\n");
|
||||
return KERN_FAILURE;
|
||||
}
|
||||
|
||||
mig_external kern_return_t catch_mach_exception_raise_state(
|
||||
mach_port_t /*exception_port*/,
|
||||
exception_type_t exception,
|
||||
const mach_exception_data_t /*code*/, // code[0] is as per kern_return.h, code[1] is rip.
|
||||
mach_msg_type_number_t /*codeCnt*/,
|
||||
int* flavor,
|
||||
const thread_state_t old_state,
|
||||
mach_msg_type_number_t old_stateCnt,
|
||||
thread_state_t new_state,
|
||||
mach_msg_type_number_t* new_stateCnt) {
|
||||
if (!flavor || !new_stateCnt) {
|
||||
fmt::print(stderr, "dynarmic: catch_mach_exception_raise_state: Invalid arguments.\n");
|
||||
return KERN_INVALID_ARGUMENT;
|
||||
}
|
||||
if (*flavor != THREAD_STATE || old_stateCnt != THREAD_STATE_COUNT || *new_stateCnt < THREAD_STATE_COUNT) {
|
||||
fmt::print(stderr, "dynarmic: catch_mach_exception_raise_state: Unexpected flavor.\n");
|
||||
return KERN_INVALID_ARGUMENT;
|
||||
}
|
||||
if (exception != EXC_BAD_ACCESS) {
|
||||
fmt::print(stderr, "dynarmic: catch_mach_exception_raise_state: Unexpected exception type.\n");
|
||||
return KERN_FAILURE;
|
||||
}
|
||||
|
||||
// The input/output pointers are not necessarily 8-byte aligned.
|
||||
dynarmic_thread_state_t ts;
|
||||
std::memcpy(&ts, old_state, sizeof(ts));
|
||||
|
||||
kern_return_t ret = mach_handler->HandleRequest(&ts);
|
||||
|
||||
std::memcpy(new_state, &ts, sizeof(ts));
|
||||
*new_stateCnt = THREAD_STATE_COUNT;
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct ExceptionHandler::Impl final {
|
||||
Impl(u64 code_begin_, u64 code_end_)
|
||||
: code_begin(code_begin_)
|
||||
, code_end(code_end_) {
|
||||
RegisterHandler();
|
||||
}
|
||||
|
||||
void SetCallback(std::function<FakeCall(u64)> cb) {
|
||||
CodeBlockInfo cbi;
|
||||
cbi.code_begin = code_begin;
|
||||
cbi.code_end = code_end;
|
||||
cbi.cb = cb;
|
||||
mach_handler->AddCodeBlock(cbi);
|
||||
}
|
||||
|
||||
~Impl() {
|
||||
mach_handler->RemoveCodeBlock(code_begin);
|
||||
}
|
||||
|
||||
private:
|
||||
u64 code_begin, code_end;
|
||||
};
|
||||
|
||||
ExceptionHandler::ExceptionHandler() = default;
|
||||
ExceptionHandler::~ExceptionHandler() = default;
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
void ExceptionHandler::Register(X64::BlockOfCode& code) {
|
||||
const u64 code_begin = mcl::bit_cast<u64>(code.getCode());
|
||||
const u64 code_end = code_begin + code.GetTotalCodeSize();
|
||||
impl = std::make_unique<Impl>(code_begin, code_end);
|
||||
}
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) {
|
||||
const u64 code_begin = mcl::bit_cast<u64>(mem.ptr());
|
||||
const u64 code_end = code_begin + size;
|
||||
impl = std::make_unique<Impl>(code_begin, code_end);
|
||||
}
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
#endif
|
||||
|
||||
bool ExceptionHandler::SupportsFastmem() const noexcept {
|
||||
return static_cast<bool>(impl);
|
||||
}
|
||||
|
||||
void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)> cb) {
|
||||
impl->SetCallback(cb);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2023 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <mcl/macro/architecture.hpp>
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
# include "dynarmic/backend/x64/mig/mach_exc_server.c"
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
# include "dynarmic/backend/arm64/mig/mach_exc_server.c"
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
#endif
|
||||
342
src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp
Normal file
342
src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp
Normal file
|
|
@ -0,0 +1,342 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2019 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/exception_handler.h"
|
||||
|
||||
#ifdef __APPLE__
|
||||
# include <signal.h>
|
||||
# include <sys/ucontext.h>
|
||||
#else
|
||||
# include <signal.h>
|
||||
# ifndef __OpenBSD__
|
||||
# include <ucontext.h>
|
||||
# endif
|
||||
# ifdef __sun__
|
||||
# include <sys/regset.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include <mcl/bit_cast.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
# include "dynarmic/backend/x64/block_of_code.h"
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
# include <oaknut/code_block.hpp>
|
||||
|
||||
# include "dynarmic/backend/arm64/abi.h"
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
# include "dynarmic/backend/riscv64/code_block.h"
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
#endif
|
||||
|
||||
namespace Dynarmic::Backend {
|
||||
|
||||
namespace {
|
||||
|
||||
struct CodeBlockInfo {
|
||||
u64 code_begin, code_end;
|
||||
std::function<FakeCall(u64)> cb;
|
||||
};
|
||||
|
||||
class SigHandler {
|
||||
public:
|
||||
SigHandler();
|
||||
~SigHandler();
|
||||
|
||||
void AddCodeBlock(CodeBlockInfo info);
|
||||
void RemoveCodeBlock(u64 host_pc);
|
||||
|
||||
bool SupportsFastmem() const { return supports_fast_mem; }
|
||||
|
||||
private:
|
||||
auto FindCodeBlockInfo(u64 host_pc) {
|
||||
return std::find_if(code_block_infos.begin(), code_block_infos.end(), [&](const auto& x) { return x.code_begin <= host_pc && x.code_end > host_pc; });
|
||||
}
|
||||
|
||||
bool supports_fast_mem = true;
|
||||
|
||||
void* signal_stack_memory = nullptr;
|
||||
|
||||
std::vector<CodeBlockInfo> code_block_infos;
|
||||
std::mutex code_block_infos_mutex;
|
||||
|
||||
struct sigaction old_sa_segv;
|
||||
struct sigaction old_sa_bus;
|
||||
|
||||
static void SigAction(int sig, siginfo_t* info, void* raw_context);
|
||||
};
|
||||
|
||||
std::mutex handler_lock;
|
||||
std::optional<SigHandler> sig_handler;
|
||||
|
||||
void RegisterHandler() {
|
||||
std::lock_guard<std::mutex> guard(handler_lock);
|
||||
if (!sig_handler) {
|
||||
sig_handler.emplace();
|
||||
}
|
||||
}
|
||||
|
||||
SigHandler::SigHandler() {
|
||||
const size_t signal_stack_size = std::max<size_t>(SIGSTKSZ, 2 * 1024 * 1024);
|
||||
|
||||
signal_stack_memory = std::malloc(signal_stack_size);
|
||||
|
||||
stack_t signal_stack;
|
||||
signal_stack.ss_sp = signal_stack_memory;
|
||||
signal_stack.ss_size = signal_stack_size;
|
||||
signal_stack.ss_flags = 0;
|
||||
if (sigaltstack(&signal_stack, nullptr) != 0) {
|
||||
fmt::print(stderr, "dynarmic: POSIX SigHandler: init failure at sigaltstack\n");
|
||||
supports_fast_mem = false;
|
||||
return;
|
||||
}
|
||||
|
||||
struct sigaction sa;
|
||||
sa.sa_handler = nullptr;
|
||||
sa.sa_sigaction = &SigHandler::SigAction;
|
||||
sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART;
|
||||
sigemptyset(&sa.sa_mask);
|
||||
if (sigaction(SIGSEGV, &sa, &old_sa_segv) != 0) {
|
||||
fmt::print(stderr, "dynarmic: POSIX SigHandler: could not set SIGSEGV handler\n");
|
||||
supports_fast_mem = false;
|
||||
return;
|
||||
}
|
||||
#ifdef __APPLE__
|
||||
if (sigaction(SIGBUS, &sa, &old_sa_bus) != 0) {
|
||||
fmt::print(stderr, "dynarmic: POSIX SigHandler: could not set SIGBUS handler\n");
|
||||
supports_fast_mem = false;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
SigHandler::~SigHandler() {
|
||||
std::free(signal_stack_memory);
|
||||
}
|
||||
|
||||
void SigHandler::AddCodeBlock(CodeBlockInfo cbi) {
|
||||
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
|
||||
if (auto iter = FindCodeBlockInfo(cbi.code_begin); iter != code_block_infos.end()) {
|
||||
code_block_infos.erase(iter);
|
||||
}
|
||||
code_block_infos.push_back(cbi);
|
||||
}
|
||||
|
||||
void SigHandler::RemoveCodeBlock(u64 host_pc) {
|
||||
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
|
||||
const auto iter = FindCodeBlockInfo(host_pc);
|
||||
if (iter == code_block_infos.end()) {
|
||||
return;
|
||||
}
|
||||
code_block_infos.erase(iter);
|
||||
}
|
||||
|
||||
void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
|
||||
ASSERT(sig == SIGSEGV || sig == SIGBUS);
|
||||
|
||||
#ifndef MCL_ARCHITECTURE_RISCV
|
||||
ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(raw_context);
|
||||
#ifndef __OpenBSD__
|
||||
auto& mctx = ucontext->uc_mcontext;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
|
||||
# if defined(__APPLE__)
|
||||
# define CTX_RIP (mctx->__ss.__rip)
|
||||
# define CTX_RSP (mctx->__ss.__rsp)
|
||||
# elif defined(__linux__)
|
||||
# define CTX_RIP (mctx.gregs[REG_RIP])
|
||||
# define CTX_RSP (mctx.gregs[REG_RSP])
|
||||
# elif defined(__FreeBSD__)
|
||||
# define CTX_RIP (mctx.mc_rip)
|
||||
# define CTX_RSP (mctx.mc_rsp)
|
||||
# elif defined(__NetBSD__)
|
||||
# define CTX_RIP (mctx.__gregs[_REG_RIP])
|
||||
# define CTX_RSP (mctx.__gregs[_REG_RSP])
|
||||
# elif defined(__OpenBSD__)
|
||||
# define CTX_RIP (ucontext->sc_rip)
|
||||
# define CTX_RSP (ucontext->sc_rsp)
|
||||
# elif defined(__sun__)
|
||||
# define CTX_RIP (mctx.gregs[REG_RIP])
|
||||
# define CTX_RSP (mctx.gregs[REG_RSP])
|
||||
# else
|
||||
# error "Unknown platform"
|
||||
# endif
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(sig_handler->code_block_infos_mutex);
|
||||
|
||||
const auto iter = sig_handler->FindCodeBlockInfo(CTX_RIP);
|
||||
if (iter != sig_handler->code_block_infos.end()) {
|
||||
FakeCall fc = iter->cb(CTX_RIP);
|
||||
|
||||
CTX_RSP -= sizeof(u64);
|
||||
*mcl::bit_cast<u64*>(CTX_RSP) = fc.ret_rip;
|
||||
CTX_RIP = fc.call_rip;
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP);
|
||||
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
|
||||
# if defined(__APPLE__)
|
||||
# define CTX_PC (mctx->__ss.__pc)
|
||||
# define CTX_SP (mctx->__ss.__sp)
|
||||
# define CTX_LR (mctx->__ss.__lr)
|
||||
# define CTX_X(i) (mctx->__ss.__x[i])
|
||||
# define CTX_Q(i) (mctx->__ns.__v[i])
|
||||
# elif defined(__linux__)
|
||||
# define CTX_PC (mctx.pc)
|
||||
# define CTX_SP (mctx.sp)
|
||||
# define CTX_LR (mctx.regs[30])
|
||||
# define CTX_X(i) (mctx.regs[i])
|
||||
# define CTX_Q(i) (fpctx->vregs[i])
|
||||
[[maybe_unused]] const auto fpctx = [&mctx] {
|
||||
_aarch64_ctx* header = (_aarch64_ctx*)&mctx.__reserved;
|
||||
while (header->magic != FPSIMD_MAGIC) {
|
||||
ASSERT(header->magic && header->size);
|
||||
header = (_aarch64_ctx*)((char*)header + header->size);
|
||||
}
|
||||
return (fpsimd_context*)header;
|
||||
}();
|
||||
# elif defined(__FreeBSD__)
|
||||
# define CTX_PC (mctx.mc_gpregs.gp_elr)
|
||||
# define CTX_SP (mctx.mc_gpregs.gp_sp)
|
||||
# define CTX_LR (mctx.mc_gpregs.gp_lr)
|
||||
# define CTX_X(i) (mctx.mc_gpregs.gp_x[i])
|
||||
# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i])
|
||||
# elif defined(__NetBSD__)
|
||||
# define CTX_PC (mctx.mc_gpregs.gp_elr)
|
||||
# define CTX_SP (mctx.mc_gpregs.gp_sp)
|
||||
# define CTX_LR (mctx.mc_gpregs.gp_lr)
|
||||
# define CTX_X(i) (mctx.mc_gpregs.gp_x[i])
|
||||
# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i])
|
||||
# elif defined(__OpenBSD__)
|
||||
# define CTX_PC (ucontext->sc_elr)
|
||||
# define CTX_SP (ucontext->sc_sp)
|
||||
# define CTX_LR (ucontext->sc_lr)
|
||||
# define CTX_X(i) (ucontext->sc_x[i])
|
||||
# define CTX_Q(i) (ucontext->sc_q[i])
|
||||
# else
|
||||
# error "Unknown platform"
|
||||
# endif
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(sig_handler->code_block_infos_mutex);
|
||||
|
||||
const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC);
|
||||
if (iter != sig_handler->code_block_infos.end()) {
|
||||
FakeCall fc = iter->cb(CTX_PC);
|
||||
|
||||
CTX_PC = fc.call_pc;
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
fmt::print(stderr, "Unhandled {} at pc {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_PC);
|
||||
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
|
||||
#else
|
||||
|
||||
# error "Invalid architecture"
|
||||
|
||||
#endif
|
||||
|
||||
struct sigaction* retry_sa = sig == SIGSEGV ? &sig_handler->old_sa_segv : &sig_handler->old_sa_bus;
|
||||
if (retry_sa->sa_flags & SA_SIGINFO) {
|
||||
retry_sa->sa_sigaction(sig, info, raw_context);
|
||||
return;
|
||||
}
|
||||
if (retry_sa->sa_handler == SIG_DFL) {
|
||||
signal(sig, SIG_DFL);
|
||||
return;
|
||||
}
|
||||
if (retry_sa->sa_handler == SIG_IGN) {
|
||||
return;
|
||||
}
|
||||
retry_sa->sa_handler(sig);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
struct ExceptionHandler::Impl final {
|
||||
Impl(u64 code_begin_, u64 code_end_)
|
||||
: code_begin(code_begin_)
|
||||
, code_end(code_end_) {
|
||||
RegisterHandler();
|
||||
}
|
||||
|
||||
void SetCallback(std::function<FakeCall(u64)> cb) {
|
||||
CodeBlockInfo cbi;
|
||||
cbi.code_begin = code_begin;
|
||||
cbi.code_end = code_end;
|
||||
cbi.cb = cb;
|
||||
sig_handler->AddCodeBlock(cbi);
|
||||
}
|
||||
|
||||
~Impl() {
|
||||
sig_handler->RemoveCodeBlock(code_begin);
|
||||
}
|
||||
|
||||
private:
|
||||
u64 code_begin, code_end;
|
||||
};
|
||||
|
||||
ExceptionHandler::ExceptionHandler() = default;
|
||||
ExceptionHandler::~ExceptionHandler() = default;
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
void ExceptionHandler::Register(X64::BlockOfCode& code) {
|
||||
const u64 code_begin = mcl::bit_cast<u64>(code.getCode());
|
||||
const u64 code_end = code_begin + code.GetTotalCodeSize();
|
||||
impl = std::make_unique<Impl>(code_begin, code_end);
|
||||
}
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) {
|
||||
const u64 code_begin = mcl::bit_cast<u64>(mem.ptr());
|
||||
const u64 code_end = code_begin + size;
|
||||
impl = std::make_unique<Impl>(code_begin, code_end);
|
||||
}
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
void ExceptionHandler::Register(RV64::CodeBlock& mem, std::size_t size) {
|
||||
const u64 code_begin = mcl::bit_cast<u64>(mem.ptr<u64>());
|
||||
const u64 code_end = code_begin + size;
|
||||
impl = std::make_unique<Impl>(code_begin, code_end);
|
||||
}
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
#endif
|
||||
|
||||
bool ExceptionHandler::SupportsFastmem() const noexcept {
|
||||
return static_cast<bool>(impl) && sig_handler->SupportsFastmem();
|
||||
}
|
||||
|
||||
void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)> cb) {
|
||||
impl->SetCallback(cb);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2023 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <mcl/macro/architecture.hpp>
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
# include "dynarmic/backend/x64/exception_handler_windows.cpp"
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
# include "dynarmic/backend/exception_handler_generic.cpp"
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
#endif
|
||||
148
src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp
Normal file
148
src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/riscv64/a32_address_space.h"
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
|
||||
#include "dynarmic/backend/riscv64/abi.h"
|
||||
#include "dynarmic/backend/riscv64/emit_riscv64.h"
|
||||
#include "dynarmic/backend/riscv64/stack_layout.h"
|
||||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
#include "dynarmic/frontend/A32/translate/a32_translate.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf)
|
||||
: conf(conf)
|
||||
, cb(conf.code_cache_size)
|
||||
, as(cb.ptr<u8*>(), conf.code_cache_size) {
|
||||
EmitPrelude();
|
||||
}
|
||||
|
||||
IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
|
||||
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
|
||||
|
||||
Optimization::PolyfillPass(ir_block, {});
|
||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
|
||||
Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true});
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
|
||||
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
Optimization::VerificationPass(ir_block);
|
||||
|
||||
return ir_block;
|
||||
}
|
||||
|
||||
CodePtr A32AddressSpace::Get(IR::LocationDescriptor descriptor) {
|
||||
if (const auto iter = block_entries.find(descriptor.Value()); iter != block_entries.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
CodePtr A32AddressSpace::GetOrEmit(IR::LocationDescriptor descriptor) {
|
||||
if (CodePtr block_entry = Get(descriptor)) {
|
||||
return block_entry;
|
||||
}
|
||||
|
||||
IR::Block ir_block = GenerateIR(descriptor);
|
||||
const EmittedBlockInfo block_info = Emit(std::move(ir_block));
|
||||
|
||||
block_infos.insert_or_assign(descriptor.Value(), block_info);
|
||||
block_entries.insert_or_assign(descriptor.Value(), block_info.entry_point);
|
||||
return block_info.entry_point;
|
||||
}
|
||||
|
||||
void A32AddressSpace::ClearCache() {
|
||||
block_entries.clear();
|
||||
block_infos.clear();
|
||||
SetCursorPtr(prelude_info.end_of_prelude);
|
||||
}
|
||||
|
||||
void A32AddressSpace::EmitPrelude() {
|
||||
using namespace biscuit;
|
||||
prelude_info.run_code = GetCursorPtr<PreludeInfo::RunCodeFuncType>();
|
||||
|
||||
// TODO: Minimize this.
|
||||
as.ADDI(sp, sp, -(64 * 8 + static_cast<int32_t>(sizeof(StackLayout))));
|
||||
for (u32 i = 1; i < 32; i += 1) {
|
||||
if (GPR{i} == sp || GPR{i} == tp)
|
||||
continue;
|
||||
as.SD(GPR{i}, i * 8 + static_cast<int32_t>(sizeof(StackLayout)), sp);
|
||||
}
|
||||
for (u32 i = 0; i < 32; i += 1) {
|
||||
as.FSD(FPR{i}, (32 + i) * 8 + static_cast<int32_t>(sizeof(StackLayout)), sp);
|
||||
}
|
||||
|
||||
as.MV(Xstate, a1);
|
||||
as.MV(Xhalt, a2);
|
||||
as.JR(a0);
|
||||
|
||||
prelude_info.return_from_run_code = GetCursorPtr<CodePtr>();
|
||||
for (u32 i = 1; i < 32; i += 1) {
|
||||
if (GPR{i} == sp || GPR{i} == tp)
|
||||
continue;
|
||||
as.LD(GPR{i}, i * 8 + static_cast<int32_t>(sizeof(StackLayout)), sp);
|
||||
}
|
||||
for (u32 i = 0; i < 32; i += 1) {
|
||||
as.FLD(FPR{i}, (32 + i) * 8 + static_cast<int32_t>(sizeof(StackLayout)), sp);
|
||||
}
|
||||
as.ADDI(sp, sp, (64 * 8 + static_cast<int32_t>(sizeof(StackLayout))));
|
||||
as.JALR(ra);
|
||||
|
||||
prelude_info.end_of_prelude = GetCursorPtr<CodePtr>();
|
||||
}
|
||||
|
||||
void A32AddressSpace::SetCursorPtr(CodePtr ptr) {
|
||||
ptrdiff_t offset = ptr - GetMemPtr<CodePtr>();
|
||||
ASSERT(offset >= 0);
|
||||
as.RewindBuffer(offset);
|
||||
}
|
||||
|
||||
size_t A32AddressSpace::GetRemainingSize() {
|
||||
return conf.code_cache_size - (GetCursorPtr<sptr>() - GetMemPtr<sptr>());
|
||||
}
|
||||
|
||||
EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) {
|
||||
if (GetRemainingSize() < 1024 * 1024) {
|
||||
ClearCache();
|
||||
}
|
||||
|
||||
EmittedBlockInfo block_info = EmitRV64(as, std::move(block), {
|
||||
.enable_cycle_counting = conf.enable_cycle_counting,
|
||||
.always_little_endian = conf.always_little_endian,
|
||||
});
|
||||
Link(block_info);
|
||||
|
||||
return block_info;
|
||||
}
|
||||
|
||||
void A32AddressSpace::Link(EmittedBlockInfo& block_info) {
|
||||
using namespace biscuit;
|
||||
for (auto [ptr_offset, target] : block_info.relocations) {
|
||||
Assembler a(reinterpret_cast<u8*>(block_info.entry_point) + ptr_offset, 4);
|
||||
|
||||
switch (target) {
|
||||
case LinkTarget::ReturnFromRunCode: {
|
||||
std::ptrdiff_t off = prelude_info.return_from_run_code - reinterpret_cast<CodePtr>(a.GetCursorPointer());
|
||||
a.J(off);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ASSERT_FALSE("Invalid relocation target");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <biscuit/assembler.hpp>
|
||||
#include <ankerl/unordered_dense.h>
|
||||
|
||||
#include "dynarmic/backend/riscv64/code_block.h"
|
||||
#include "dynarmic/backend/riscv64/emit_riscv64.h"
|
||||
#include "dynarmic/interface/A32/config.h"
|
||||
#include "dynarmic/interface/halt_reason.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/location_descriptor.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
struct A32JitState;
|
||||
|
||||
class A32AddressSpace final {
|
||||
public:
|
||||
explicit A32AddressSpace(const A32::UserConfig& conf);
|
||||
|
||||
IR::Block GenerateIR(IR::LocationDescriptor) const;
|
||||
|
||||
CodePtr Get(IR::LocationDescriptor descriptor);
|
||||
|
||||
CodePtr GetOrEmit(IR::LocationDescriptor descriptor);
|
||||
|
||||
void ClearCache();
|
||||
|
||||
private:
|
||||
friend class A32Core;
|
||||
|
||||
void EmitPrelude();
|
||||
|
||||
template<typename T>
|
||||
T GetMemPtr() {
|
||||
static_assert(std::is_pointer_v<T> || std::is_same_v<T, uptr> || std::is_same_v<T, sptr>);
|
||||
return reinterpret_cast<T>(as.GetBufferPointer(0));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T GetMemPtr() const {
|
||||
static_assert(std::is_pointer_v<T> || std::is_same_v<T, uptr> || std::is_same_v<T, sptr>);
|
||||
return reinterpret_cast<const T>(as.GetBufferPointer(0));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T GetCursorPtr() {
|
||||
static_assert(std::is_pointer_v<T> || std::is_same_v<T, uptr> || std::is_same_v<T, sptr>);
|
||||
return reinterpret_cast<T>(as.GetCursorPointer());
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T GetCursorPtr() const {
|
||||
static_assert(std::is_pointer_v<T> || std::is_same_v<T, uptr> || std::is_same_v<T, sptr>);
|
||||
return reinterpret_cast<const T>(as.GetCursorPointer());
|
||||
}
|
||||
|
||||
void SetCursorPtr(CodePtr ptr);
|
||||
|
||||
size_t GetRemainingSize();
|
||||
EmittedBlockInfo Emit(IR::Block ir_block);
|
||||
void Link(EmittedBlockInfo& block);
|
||||
|
||||
const A32::UserConfig conf;
|
||||
|
||||
CodeBlock cb;
|
||||
biscuit::Assembler as;
|
||||
|
||||
ankerl::unordered_dense::map<u64, CodePtr> block_entries;
|
||||
ankerl::unordered_dense::map<u64, EmittedBlockInfo> block_infos;
|
||||
|
||||
struct PreludeInfo {
|
||||
CodePtr end_of_prelude;
|
||||
|
||||
using RunCodeFuncType = HaltReason (*)(CodePtr entry_point, A32JitState* context, volatile u32* halt_reason);
|
||||
RunCodeFuncType run_code;
|
||||
CodePtr return_from_run_code;
|
||||
} prelude_info;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
24
src/dynarmic/src/dynarmic/backend/riscv64/a32_core.h
Normal file
24
src/dynarmic/src/dynarmic/backend/riscv64/a32_core.h
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "dynarmic/backend/riscv64/a32_address_space.h"
|
||||
#include "dynarmic/backend/riscv64/a32_jitstate.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
class A32Core final {
|
||||
public:
|
||||
explicit A32Core(const A32::UserConfig&) {}
|
||||
|
||||
HaltReason Run(A32AddressSpace& process, A32JitState& thread_ctx, volatile u32* halt_reason) {
|
||||
const auto location_descriptor = thread_ctx.GetLocationDescriptor();
|
||||
const auto entry_point = process.GetOrEmit(location_descriptor);
|
||||
return process.prelude_info.run_code(entry_point, &thread_ctx, halt_reason);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
220
src/dynarmic/src/dynarmic/backend/riscv64/a32_interface.cpp
Normal file
220
src/dynarmic/src/dynarmic/backend/riscv64/a32_interface.cpp
Normal file
|
|
@ -0,0 +1,220 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
#include <boost/icl/interval_set.hpp>
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include <mcl/scope_exit.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/backend/riscv64/a32_address_space.h"
|
||||
#include "dynarmic/backend/riscv64/a32_core.h"
|
||||
#include "dynarmic/backend/riscv64/a32_jitstate.h"
|
||||
#include "dynarmic/common/atomic.h"
|
||||
#include "dynarmic/interface/A32/a32.h"
|
||||
|
||||
namespace Dynarmic::A32 {
|
||||
|
||||
using namespace Backend::RV64;
|
||||
|
||||
struct Jit::Impl final {
|
||||
Impl(Jit* jit_interface, A32::UserConfig conf)
|
||||
: jit_interface(jit_interface)
|
||||
, conf(conf)
|
||||
, current_address_space(conf)
|
||||
, core(conf) {}
|
||||
|
||||
HaltReason Run() {
|
||||
ASSERT(!jit_interface->is_executing);
|
||||
jit_interface->is_executing = true;
|
||||
SCOPE_EXIT {
|
||||
jit_interface->is_executing = false;
|
||||
};
|
||||
|
||||
HaltReason hr = core.Run(current_address_space, current_state, &halt_reason);
|
||||
|
||||
RequestCacheInvalidation();
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
HaltReason Step() {
|
||||
ASSERT(!jit_interface->is_executing);
|
||||
jit_interface->is_executing = true;
|
||||
SCOPE_EXIT {
|
||||
jit_interface->is_executing = false;
|
||||
};
|
||||
|
||||
UNIMPLEMENTED();
|
||||
|
||||
RequestCacheInvalidation();
|
||||
|
||||
return HaltReason{};
|
||||
}
|
||||
|
||||
void ClearCache() {
|
||||
std::unique_lock lock{invalidation_mutex};
|
||||
invalidate_entire_cache = true;
|
||||
HaltExecution(HaltReason::CacheInvalidation);
|
||||
}
|
||||
|
||||
void InvalidateCacheRange(u32 start_address, size_t length) {
|
||||
std::unique_lock lock{invalidation_mutex};
|
||||
invalid_cache_ranges.add(boost::icl::discrete_interval<u32>::closed(start_address, static_cast<u32>(start_address + length - 1)));
|
||||
HaltExecution(HaltReason::CacheInvalidation);
|
||||
}
|
||||
|
||||
void Reset() {
|
||||
current_state = {};
|
||||
}
|
||||
|
||||
void HaltExecution(HaltReason hr) {
|
||||
Atomic::Or(&halt_reason, ~static_cast<u32>(hr));
|
||||
}
|
||||
|
||||
void ClearHalt(HaltReason hr) {
|
||||
Atomic::And(&halt_reason, ~static_cast<u32>(hr));
|
||||
}
|
||||
|
||||
std::array<u32, 16>& Regs() {
|
||||
return current_state.regs;
|
||||
}
|
||||
|
||||
const std::array<u32, 16>& Regs() const {
|
||||
return current_state.regs;
|
||||
}
|
||||
|
||||
std::array<u32, 64>& ExtRegs() {
|
||||
return current_state.ext_regs;
|
||||
}
|
||||
|
||||
const std::array<u32, 64>& ExtRegs() const {
|
||||
return current_state.ext_regs;
|
||||
}
|
||||
|
||||
u32 Cpsr() const {
|
||||
return current_state.Cpsr();
|
||||
}
|
||||
|
||||
void SetCpsr(u32 value) {
|
||||
current_state.SetCpsr(value);
|
||||
}
|
||||
|
||||
u32 Fpscr() const {
|
||||
return current_state.Fpscr();
|
||||
}
|
||||
|
||||
void SetFpscr(u32 value) {
|
||||
current_state.SetFpscr(value);
|
||||
}
|
||||
|
||||
void ClearExclusiveState() {
|
||||
current_state.exclusive_state = false;
|
||||
}
|
||||
|
||||
void DumpDisassembly() const {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
private:
|
||||
void RequestCacheInvalidation() {
|
||||
// ASSERT_FALSE("Unimplemented");
|
||||
|
||||
invalidate_entire_cache = false;
|
||||
invalid_cache_ranges.clear();
|
||||
}
|
||||
|
||||
Jit* jit_interface;
|
||||
A32::UserConfig conf;
|
||||
A32JitState current_state{};
|
||||
A32AddressSpace current_address_space;
|
||||
A32Core core;
|
||||
|
||||
volatile u32 halt_reason = 0;
|
||||
|
||||
std::mutex invalidation_mutex;
|
||||
boost::icl::interval_set<u32> invalid_cache_ranges;
|
||||
bool invalidate_entire_cache = false;
|
||||
};
|
||||
|
||||
Jit::Jit(UserConfig conf)
|
||||
: impl(std::make_unique<Impl>(this, conf)) {}
|
||||
|
||||
Jit::~Jit() = default;
|
||||
|
||||
HaltReason Jit::Run() {
|
||||
return impl->Run();
|
||||
}
|
||||
|
||||
HaltReason Jit::Step() {
|
||||
return impl->Step();
|
||||
}
|
||||
|
||||
void Jit::ClearCache() {
|
||||
impl->ClearCache();
|
||||
}
|
||||
|
||||
void Jit::InvalidateCacheRange(u32 start_address, std::size_t length) {
|
||||
impl->InvalidateCacheRange(start_address, length);
|
||||
}
|
||||
|
||||
void Jit::Reset() {
|
||||
impl->Reset();
|
||||
}
|
||||
|
||||
void Jit::HaltExecution(HaltReason hr) {
|
||||
impl->HaltExecution(hr);
|
||||
}
|
||||
|
||||
void Jit::ClearHalt(HaltReason hr) {
|
||||
impl->ClearHalt(hr);
|
||||
}
|
||||
|
||||
std::array<u32, 16>& Jit::Regs() {
|
||||
return impl->Regs();
|
||||
}
|
||||
|
||||
const std::array<u32, 16>& Jit::Regs() const {
|
||||
return impl->Regs();
|
||||
}
|
||||
|
||||
std::array<u32, 64>& Jit::ExtRegs() {
|
||||
return impl->ExtRegs();
|
||||
}
|
||||
|
||||
const std::array<u32, 64>& Jit::ExtRegs() const {
|
||||
return impl->ExtRegs();
|
||||
}
|
||||
|
||||
u32 Jit::Cpsr() const {
|
||||
return impl->Cpsr();
|
||||
}
|
||||
|
||||
void Jit::SetCpsr(u32 value) {
|
||||
impl->SetCpsr(value);
|
||||
}
|
||||
|
||||
u32 Jit::Fpscr() const {
|
||||
return impl->Fpscr();
|
||||
}
|
||||
|
||||
void Jit::SetFpscr(u32 value) {
|
||||
impl->SetFpscr(value);
|
||||
}
|
||||
|
||||
void Jit::ClearExclusiveState() {
|
||||
impl->ClearExclusiveState();
|
||||
}
|
||||
|
||||
void Jit::DumpDisassembly() const {
|
||||
impl->DumpDisassembly();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
76
src/dynarmic/src/dynarmic/backend/riscv64/a32_jitstate.cpp
Normal file
76
src/dynarmic/src/dynarmic/backend/riscv64/a32_jitstate.cpp
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/riscv64/a32_jitstate.h"
|
||||
|
||||
#include <mcl/bit/bit_field.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
u32 A32JitState::Cpsr() const {
|
||||
u32 cpsr = 0;
|
||||
|
||||
// NZCV flags
|
||||
cpsr |= cpsr_nzcv;
|
||||
// Q flag
|
||||
cpsr |= cpsr_q;
|
||||
// GE flags
|
||||
cpsr |= mcl::bit::get_bit<31>(cpsr_ge) ? 1 << 19 : 0;
|
||||
cpsr |= mcl::bit::get_bit<23>(cpsr_ge) ? 1 << 18 : 0;
|
||||
cpsr |= mcl::bit::get_bit<15>(cpsr_ge) ? 1 << 17 : 0;
|
||||
cpsr |= mcl::bit::get_bit<7>(cpsr_ge) ? 1 << 16 : 0;
|
||||
// E flag, T flag
|
||||
cpsr |= mcl::bit::get_bit<1>(upper_location_descriptor) ? 1 << 9 : 0;
|
||||
cpsr |= mcl::bit::get_bit<0>(upper_location_descriptor) ? 1 << 5 : 0;
|
||||
// IT state
|
||||
cpsr |= static_cast<u32>(upper_location_descriptor & 0b11111100'00000000);
|
||||
cpsr |= static_cast<u32>(upper_location_descriptor & 0b00000011'00000000) << 17;
|
||||
// Other flags
|
||||
cpsr |= cpsr_jaifm;
|
||||
|
||||
return cpsr;
|
||||
}
|
||||
|
||||
void A32JitState::SetCpsr(u32 cpsr) {
|
||||
// NZCV flags
|
||||
cpsr_nzcv = cpsr & 0xF0000000;
|
||||
// Q flag
|
||||
cpsr_q = cpsr & (1 << 27);
|
||||
// GE flags
|
||||
cpsr_ge = 0;
|
||||
cpsr_ge |= mcl::bit::get_bit<19>(cpsr) ? 0xFF000000 : 0;
|
||||
cpsr_ge |= mcl::bit::get_bit<18>(cpsr) ? 0x00FF0000 : 0;
|
||||
cpsr_ge |= mcl::bit::get_bit<17>(cpsr) ? 0x0000FF00 : 0;
|
||||
cpsr_ge |= mcl::bit::get_bit<16>(cpsr) ? 0x000000FF : 0;
|
||||
|
||||
upper_location_descriptor &= 0xFFFF0000;
|
||||
// E flag, T flag
|
||||
upper_location_descriptor |= mcl::bit::get_bit<9>(cpsr) ? 2 : 0;
|
||||
upper_location_descriptor |= mcl::bit::get_bit<5>(cpsr) ? 1 : 0;
|
||||
// IT state
|
||||
upper_location_descriptor |= (cpsr >> 0) & 0b11111100'00000000;
|
||||
upper_location_descriptor |= (cpsr >> 17) & 0b00000011'00000000;
|
||||
|
||||
// Other flags
|
||||
cpsr_jaifm = cpsr & 0x010001DF;
|
||||
}
|
||||
|
||||
constexpr u32 FPCR_MASK = A32::LocationDescriptor::FPSCR_MODE_MASK;
|
||||
constexpr u32 FPSR_MASK = 0xF800009F;
|
||||
|
||||
u32 A32JitState::Fpscr() const {
|
||||
return (upper_location_descriptor & 0xffff0000) | fpsr;
|
||||
}
|
||||
|
||||
void A32JitState::SetFpscr(u32 fpscr) {
|
||||
fpsr = fpscr & FPSR_MASK;
|
||||
upper_location_descriptor = (upper_location_descriptor & 0x0000ffff) | (fpscr & FPCR_MASK);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
48
src/dynarmic/src/dynarmic/backend/riscv64/a32_jitstate.h
Normal file
48
src/dynarmic/src/dynarmic/backend/riscv64/a32_jitstate.h
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
#include "dynarmic/ir/location_descriptor.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
struct A32JitState {
|
||||
u32 cpsr_nzcv = 0;
|
||||
u32 cpsr_q = 0;
|
||||
u32 cpsr_jaifm = 0;
|
||||
u32 cpsr_ge = 0;
|
||||
|
||||
u32 fpsr = 0;
|
||||
u32 fpsr_nzcv = 0;
|
||||
|
||||
std::array<u32, 16> regs{};
|
||||
|
||||
u32 upper_location_descriptor;
|
||||
|
||||
alignas(16) std::array<u32, 64> ext_regs{};
|
||||
|
||||
u32 exclusive_state = 0;
|
||||
|
||||
u32 Cpsr() const;
|
||||
void SetCpsr(u32 cpsr);
|
||||
|
||||
u32 Fpscr() const;
|
||||
void SetFpscr(u32 fpscr);
|
||||
|
||||
IR::LocationDescriptor GetLocationDescriptor() const {
|
||||
return IR::LocationDescriptor{regs[15] | (static_cast<u64>(upper_location_descriptor) << 32)};
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
20
src/dynarmic/src/dynarmic/backend/riscv64/abi.h
Normal file
20
src/dynarmic/src/dynarmic/backend/riscv64/abi.h
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <biscuit/registers.hpp>
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
constexpr biscuit::GPR Xstate{27};
|
||||
constexpr biscuit::GPR Xhalt{26};
|
||||
|
||||
constexpr biscuit::GPR Xscratch0{30}, Xscratch1{31};
|
||||
|
||||
constexpr std::initializer_list<u32> GPR_ORDER{8, 9, 18, 19, 20, 21, 22, 23, 24, 25, 5, 6, 7, 28, 29, 10, 11, 12, 13, 14, 15, 16, 17};
|
||||
constexpr std::initializer_list<u32> FPR_ORDER{8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
39
src/dynarmic/src/dynarmic/backend/riscv64/code_block.h
Normal file
39
src/dynarmic/src/dynarmic/backend/riscv64/code_block.h
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <new>
|
||||
|
||||
#include <sys/mman.h>
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
class CodeBlock {
|
||||
public:
|
||||
explicit CodeBlock(std::size_t size) noexcept : memsize(size) {
|
||||
mem = (u8*)mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
|
||||
if (mem == nullptr)
|
||||
ASSERT_FALSE("out of memory");
|
||||
}
|
||||
|
||||
~CodeBlock() noexcept {
|
||||
if (mem == nullptr)
|
||||
return;
|
||||
munmap(mem, memsize);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T ptr() const noexcept {
|
||||
static_assert(std::is_pointer_v<T> || std::is_same_v<T, uptr> || std::is_same_v<T, sptr>);
|
||||
return reinterpret_cast<T>(mem);
|
||||
}
|
||||
|
||||
protected:
|
||||
u8* mem = nullptr;
|
||||
size_t memsize = 0;
|
||||
};
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
26
src/dynarmic/src/dynarmic/backend/riscv64/emit_context.h
Normal file
26
src/dynarmic/src/dynarmic/backend/riscv64/emit_context.h
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "dynarmic/backend/riscv64/emit_riscv64.h"
|
||||
#include "dynarmic/backend/riscv64/reg_alloc.h"
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
class Block;
|
||||
} // namespace Dynarmic::IR
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
struct EmitConfig;
|
||||
|
||||
struct EmitContext {
|
||||
IR::Block& block;
|
||||
RegAlloc& reg_alloc;
|
||||
const EmitConfig& emit_conf;
|
||||
EmittedBlockInfo& ebi;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
174
src/dynarmic/src/dynarmic/backend/riscv64/emit_riscv64.cpp
Normal file
174
src/dynarmic/src/dynarmic/backend/riscv64/emit_riscv64.cpp
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/riscv64/emit_riscv64.h"
|
||||
|
||||
#include <bit>
|
||||
|
||||
#include <biscuit/assembler.hpp>
|
||||
#include <fmt/ostream.h>
|
||||
#include <mcl/bit/bit_field.hpp>
|
||||
|
||||
#include "dynarmic/backend/riscv64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/riscv64/abi.h"
|
||||
#include "dynarmic/backend/riscv64/emit_context.h"
|
||||
#include "dynarmic/backend/riscv64/reg_alloc.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Void>(biscuit::Assembler&, EmitContext&, IR::Inst*) {}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Identity>(biscuit::Assembler&, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.DefineAsExisting(inst, args[0]);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Breakpoint>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CallHostFunction>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PushRSB>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::GetCarryFromOp>(biscuit::Assembler&, EmitContext& ctx, IR::Inst* inst) {
|
||||
[[maybe_unused]] auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ASSERT(ctx.reg_alloc.IsValueLive(inst));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::GetOverflowFromOp>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::GetGEFromOp>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::GetNZCVFromOp>(biscuit::Assembler&, EmitContext& ctx, IR::Inst* inst) {
|
||||
[[maybe_unused]] auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ASSERT(ctx.reg_alloc.IsValueLive(inst));
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::GetNZFromOp>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
auto Xvalue = ctx.reg_alloc.ReadX(args[0]);
|
||||
auto Xnz = ctx.reg_alloc.WriteX(inst);
|
||||
RegAlloc::Realize(Xvalue, Xnz);
|
||||
|
||||
as.SEQZ(Xnz, Xvalue);
|
||||
as.SLLI(Xnz, Xnz, 30);
|
||||
as.SLTZ(Xscratch0, Xvalue);
|
||||
as.SLLI(Xscratch0, Xscratch0, 31);
|
||||
as.OR(Xnz, Xnz, Xscratch0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::GetUpperFromOp>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::GetLowerFromOp>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::GetCFlagFromNZCV>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
auto Xc = ctx.reg_alloc.WriteX(inst);
|
||||
auto Xnzcv = ctx.reg_alloc.ReadX(args[0]);
|
||||
RegAlloc::Realize(Xc, Xnzcv);
|
||||
|
||||
as.LUI(Xscratch0, 0x20000);
|
||||
as.AND(Xc, Xnzcv, Xscratch0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::NZCVFromPackedFlags>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
EmittedBlockInfo EmitRV64(biscuit::Assembler& as, IR::Block block, const EmitConfig& emit_conf) {
|
||||
using namespace biscuit;
|
||||
|
||||
EmittedBlockInfo ebi;
|
||||
|
||||
RegAlloc reg_alloc{as, GPR_ORDER, FPR_ORDER};
|
||||
EmitContext ctx{block, reg_alloc, emit_conf, ebi};
|
||||
|
||||
ebi.entry_point = reinterpret_cast<CodePtr>(as.GetCursorPointer());
|
||||
|
||||
for (auto iter = block.begin(); iter != block.end(); ++iter) {
|
||||
IR::Inst* inst = &*iter;
|
||||
|
||||
switch (inst->GetOpcode()) {
|
||||
#define OPCODE(name, type, ...) \
|
||||
case IR::Opcode::name: \
|
||||
EmitIR<IR::Opcode::name>(as, ctx, inst); \
|
||||
break;
|
||||
#define A32OPC(name, type, ...) \
|
||||
case IR::Opcode::A32##name: \
|
||||
EmitIR<IR::Opcode::A32##name>(as, ctx, inst); \
|
||||
break;
|
||||
#define A64OPC(name, type, ...) \
|
||||
case IR::Opcode::A64##name: \
|
||||
EmitIR<IR::Opcode::A64##name>(as, ctx, inst); \
|
||||
break;
|
||||
#include "dynarmic/ir/opcodes.inc"
|
||||
#undef OPCODE
|
||||
#undef A32OPC
|
||||
#undef A64OPC
|
||||
default:
|
||||
ASSERT_FALSE("Invalid opcode: {}", inst->GetOpcode());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
reg_alloc.UpdateAllUses();
|
||||
reg_alloc.AssertNoMoreUses();
|
||||
|
||||
if (emit_conf.enable_cycle_counting) {
|
||||
const size_t cycles_to_add = block.CycleCount();
|
||||
as.LD(Xscratch0, offsetof(StackLayout, cycles_remaining), sp);
|
||||
if (mcl::bit::sign_extend<12>(-cycles_to_add) == -cycles_to_add) {
|
||||
as.ADDI(Xscratch0, Xscratch0, -cycles_to_add);
|
||||
} else {
|
||||
as.LI(Xscratch1, cycles_to_add);
|
||||
as.SUB(Xscratch0, Xscratch0, Xscratch1);
|
||||
}
|
||||
as.SD(Xscratch0, offsetof(StackLayout, cycles_remaining), sp);
|
||||
}
|
||||
|
||||
EmitA32Terminal(as, ctx);
|
||||
|
||||
ebi.size = reinterpret_cast<CodePtr>(as.GetCursorPointer()) - ebi.entry_point;
|
||||
return ebi;
|
||||
}
|
||||
|
||||
void EmitRelocation(biscuit::Assembler& as, EmitContext& ctx, LinkTarget link_target) {
|
||||
ctx.ebi.relocations.emplace_back(Relocation{reinterpret_cast<CodePtr>(as.GetCursorPointer()) - ctx.ebi.entry_point, link_target});
|
||||
as.NOP();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
61
src/dynarmic/src/dynarmic/backend/riscv64/emit_riscv64.h
Normal file
61
src/dynarmic/src/dynarmic/backend/riscv64/emit_riscv64.h
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <biscuit/label.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
namespace biscuit {
|
||||
class Assembler;
|
||||
} // namespace biscuit
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
class Block;
|
||||
class Inst;
|
||||
enum class Cond;
|
||||
enum class Opcode;
|
||||
} // namespace Dynarmic::IR
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
using CodePtr = std::byte*;
|
||||
|
||||
enum class LinkTarget {
|
||||
ReturnFromRunCode,
|
||||
};
|
||||
|
||||
struct Relocation {
|
||||
std::ptrdiff_t code_offset;
|
||||
LinkTarget target;
|
||||
};
|
||||
|
||||
struct EmittedBlockInfo {
|
||||
CodePtr entry_point;
|
||||
size_t size;
|
||||
std::vector<Relocation> relocations;
|
||||
};
|
||||
|
||||
struct EmitConfig {
|
||||
bool enable_cycle_counting;
|
||||
bool always_little_endian;
|
||||
};
|
||||
|
||||
struct EmitContext;
|
||||
|
||||
EmittedBlockInfo EmitRV64(biscuit::Assembler& as, IR::Block block, const EmitConfig& emit_conf);
|
||||
|
||||
template<IR::Opcode op>
|
||||
void EmitIR(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst);
|
||||
void EmitRelocation(biscuit::Assembler& as, EmitContext& ctx, LinkTarget link_target);
|
||||
void EmitA32Cond(biscuit::Assembler& as, EmitContext& ctx, IR::Cond cond, biscuit::Label* label);
|
||||
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx);
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
401
src/dynarmic/src/dynarmic/backend/riscv64/emit_riscv64_a32.cpp
Normal file
401
src/dynarmic/src/dynarmic/backend/riscv64/emit_riscv64_a32.cpp
Normal file
|
|
@ -0,0 +1,401 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <biscuit/assembler.hpp>
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
#include "dynarmic/backend/riscv64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/riscv64/abi.h"
|
||||
#include "dynarmic/backend/riscv64/emit_context.h"
|
||||
#include "dynarmic/backend/riscv64/emit_riscv64.h"
|
||||
#include "dynarmic/backend/riscv64/reg_alloc.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
void EmitA32Cond(biscuit::Assembler& as, EmitContext&, IR::Cond cond, biscuit::Label* label) {
|
||||
as.LWU(Xscratch0, offsetof(A32JitState, cpsr_nzcv), Xstate);
|
||||
as.SRLIW(Xscratch0, Xscratch0, 28);
|
||||
|
||||
switch (cond) {
|
||||
case IR::Cond::EQ:
|
||||
// Z == 1
|
||||
as.ANDI(Xscratch0, Xscratch0, 0b0100);
|
||||
as.BNEZ(Xscratch0, label);
|
||||
break;
|
||||
case IR::Cond::NE:
|
||||
// Z = 0
|
||||
as.ANDI(Xscratch0, Xscratch0, 0b0100);
|
||||
as.BEQZ(Xscratch0, label);
|
||||
break;
|
||||
case IR::Cond::CS:
|
||||
// C == 1
|
||||
as.ANDI(Xscratch0, Xscratch0, 0b0010);
|
||||
as.BNEZ(Xscratch0, label);
|
||||
break;
|
||||
case IR::Cond::CC:
|
||||
// C == 0
|
||||
as.ANDI(Xscratch0, Xscratch0, 0b0010);
|
||||
as.BEQZ(Xscratch0, label);
|
||||
break;
|
||||
case IR::Cond::MI:
|
||||
// N == 1
|
||||
as.ANDI(Xscratch0, Xscratch0, 0b1000);
|
||||
as.BNEZ(Xscratch0, label);
|
||||
break;
|
||||
case IR::Cond::PL:
|
||||
// N == 0
|
||||
as.ANDI(Xscratch0, Xscratch0, 0b1000);
|
||||
as.BEQZ(Xscratch0, label);
|
||||
break;
|
||||
case IR::Cond::VS:
|
||||
// V == 1
|
||||
as.ANDI(Xscratch0, Xscratch0, 0b0001);
|
||||
as.BNEZ(Xscratch0, label);
|
||||
break;
|
||||
case IR::Cond::VC:
|
||||
// V == 0
|
||||
as.ANDI(Xscratch0, Xscratch0, 0b0001);
|
||||
as.BEQZ(Xscratch0, label);
|
||||
break;
|
||||
case IR::Cond::HI:
|
||||
// Z == 0 && C == 1
|
||||
as.ANDI(Xscratch0, Xscratch0, 0b0110);
|
||||
as.ADDI(Xscratch1, biscuit::zero, 0b0010);
|
||||
as.BEQ(Xscratch0, Xscratch1, label);
|
||||
break;
|
||||
case IR::Cond::LS:
|
||||
// Z == 1 || C == 0
|
||||
as.ANDI(Xscratch0, Xscratch0, 0b0110);
|
||||
as.ADDI(Xscratch1, biscuit::zero, 0b0010);
|
||||
as.BNE(Xscratch0, Xscratch1, label);
|
||||
break;
|
||||
case IR::Cond::GE:
|
||||
// N == V
|
||||
as.ANDI(Xscratch0, Xscratch0, 0b1001);
|
||||
as.ADDI(Xscratch1, biscuit::zero, 0b1001);
|
||||
as.BEQ(Xscratch0, Xscratch1, label);
|
||||
as.BEQZ(Xscratch0, label);
|
||||
break;
|
||||
case IR::Cond::LT:
|
||||
// N != V
|
||||
as.ANDI(Xscratch0, Xscratch0, 0b1001);
|
||||
as.ADDI(Xscratch1, biscuit::zero, 0b1000);
|
||||
as.BEQ(Xscratch0, Xscratch1, label);
|
||||
as.ADDI(Xscratch1, biscuit::zero, 0b0001);
|
||||
as.BEQ(Xscratch0, Xscratch1, label);
|
||||
break;
|
||||
case IR::Cond::GT:
|
||||
// Z == 0 && N == V
|
||||
as.ANDI(Xscratch0, Xscratch0, 0b1101);
|
||||
as.ADDI(Xscratch1, biscuit::zero, 0b1001);
|
||||
as.BEQ(Xscratch0, Xscratch1, label);
|
||||
as.BEQZ(Xscratch0, label);
|
||||
break;
|
||||
case IR::Cond::LE:
|
||||
// Z == 1 || N != V
|
||||
as.ANDI(Xscratch0, Xscratch0, 0b1101);
|
||||
as.LI(Xscratch1, 0b11000100110010);
|
||||
as.SRLW(Xscratch0, Xscratch1, Xscratch0);
|
||||
as.ANDI(Xscratch0, Xscratch0, 1);
|
||||
as.BNEZ(Xscratch0, label);
|
||||
break;
|
||||
default:
|
||||
ASSERT_MSG(false, "Unknown cond {}", static_cast<size_t>(cond));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step);
|
||||
|
||||
void EmitA32Terminal(biscuit::Assembler&, EmitContext&, IR::Term::Interpret, IR::LocationDescriptor, bool) {
|
||||
ASSERT_FALSE("Interpret should never be emitted.");
|
||||
}
|
||||
|
||||
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) {
|
||||
EmitRelocation(as, ctx, LinkTarget::ReturnFromRunCode);
|
||||
}
|
||||
|
||||
void EmitSetUpperLocationDescriptor(biscuit::Assembler& as, EmitContext& ctx, IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) {
|
||||
auto get_upper = [](const IR::LocationDescriptor& desc) -> u32 {
|
||||
return static_cast<u32>(A32::LocationDescriptor{desc}.SetSingleStepping(false).UniqueHash() >> 32);
|
||||
};
|
||||
|
||||
const u32 old_upper = get_upper(old_location);
|
||||
const u32 new_upper = [&] {
|
||||
const u32 mask = ~u32(ctx.emit_conf.always_little_endian ? 0x2 : 0);
|
||||
return get_upper(new_location) & mask;
|
||||
}();
|
||||
|
||||
if (old_upper != new_upper) {
|
||||
as.LI(Xscratch0, new_upper);
|
||||
as.SW(Xscratch0, offsetof(A32JitState, upper_location_descriptor), Xstate);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool) {
|
||||
EmitSetUpperLocationDescriptor(as, ctx, terminal.next, initial_location);
|
||||
|
||||
as.LI(Xscratch0, terminal.next.Value());
|
||||
as.SW(Xscratch0, offsetof(A32JitState, regs) + sizeof(u32) * 15, Xstate);
|
||||
EmitRelocation(as, ctx, LinkTarget::ReturnFromRunCode);
|
||||
|
||||
// TODO: Implement LinkBlock optimization
|
||||
}
|
||||
|
||||
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool) {
|
||||
EmitSetUpperLocationDescriptor(as, ctx, terminal.next, initial_location);
|
||||
|
||||
as.LI(Xscratch0, terminal.next.Value());
|
||||
as.SW(Xscratch0, offsetof(A32JitState, regs) + sizeof(u32) * 15, Xstate);
|
||||
EmitRelocation(as, ctx, LinkTarget::ReturnFromRunCode);
|
||||
|
||||
// TODO: Implement LinkBlockFast optimization
|
||||
}
|
||||
|
||||
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::PopRSBHint, IR::LocationDescriptor, bool) {
|
||||
EmitRelocation(as, ctx, LinkTarget::ReturnFromRunCode);
|
||||
|
||||
// TODO: Implement PopRSBHint optimization
|
||||
}
|
||||
|
||||
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::FastDispatchHint, IR::LocationDescriptor, bool) {
|
||||
EmitRelocation(as, ctx, LinkTarget::ReturnFromRunCode);
|
||||
|
||||
// TODO: Implement FastDispatchHint optimization
|
||||
}
|
||||
|
||||
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
biscuit::Label pass;
|
||||
EmitA32Cond(as, ctx, terminal.if_, &pass);
|
||||
EmitA32Terminal(as, ctx, terminal.else_, initial_location, is_single_step);
|
||||
as.Bind(&pass);
|
||||
EmitA32Terminal(as, ctx, terminal.then_, initial_location, is_single_step);
|
||||
}
|
||||
|
||||
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
biscuit::Label fail;
|
||||
as.LBU(Xscratch0, offsetof(StackLayout, check_bit), Xstate);
|
||||
as.BEQZ(Xscratch0, &fail);
|
||||
EmitA32Terminal(as, ctx, terminal.then_, initial_location, is_single_step);
|
||||
as.Bind(&fail);
|
||||
EmitA32Terminal(as, ctx, terminal.else_, initial_location, is_single_step);
|
||||
}
|
||||
|
||||
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
biscuit::Label fail;
|
||||
as.LWU(Xscratch0, 0, Xhalt);
|
||||
as.FENCE(biscuit::FenceOrder::RW, biscuit::FenceOrder::RW);
|
||||
as.BNEZ(Xscratch0, &fail);
|
||||
EmitA32Terminal(as, ctx, terminal.else_, initial_location, is_single_step);
|
||||
as.Bind(&fail);
|
||||
EmitRelocation(as, ctx, LinkTarget::ReturnFromRunCode);
|
||||
}
|
||||
|
||||
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
boost::apply_visitor([&](const auto& t) { EmitA32Terminal(as, ctx, t, initial_location, is_single_step); }, terminal);
|
||||
}
|
||||
|
||||
void EmitA32Terminal(biscuit::Assembler& as, EmitContext& ctx) {
|
||||
const A32::LocationDescriptor location{ctx.block.Location()};
|
||||
EmitA32Terminal(as, ctx, ctx.block.GetTerminal(), location.SetSingleStepping(false), location.SingleStepping());
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetCheckBit>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetRegister>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
|
||||
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
|
||||
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
RegAlloc::Realize(Xresult);
|
||||
|
||||
as.LWU(Xresult, offsetof(A32JitState, regs) + sizeof(u32) * static_cast<size_t>(reg), Xstate);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetExtendedRegister32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetExtendedRegister64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetVector>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetRegister>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
|
||||
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
auto Xvalue = ctx.reg_alloc.ReadX(args[1]);
|
||||
RegAlloc::Realize(Xvalue);
|
||||
|
||||
// TODO: Detect if Gpr vs Fpr is more appropriate
|
||||
|
||||
as.SW(Xvalue, offsetof(A32JitState, regs) + sizeof(u32) * static_cast<size_t>(reg), Xstate);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetExtendedRegister32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetExtendedRegister64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetVector>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetCpsr>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetCpsr>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetCpsrNZCV>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
auto Xnzcv = ctx.reg_alloc.ReadX(args[0]);
|
||||
RegAlloc::Realize(Xnzcv);
|
||||
|
||||
as.SW(Xnzcv, offsetof(A32JitState, cpsr_nzcv), Xstate);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetCpsrNZCVRaw>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetCpsrNZCVQ>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetCpsrNZ>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetCpsrNZC>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
// TODO: Add full implementation
|
||||
ASSERT(!args[0].IsImmediate() && !args[1].IsImmediate());
|
||||
|
||||
auto Xnz = ctx.reg_alloc.ReadX(args[0]);
|
||||
auto Xc = ctx.reg_alloc.ReadX(args[1]);
|
||||
RegAlloc::Realize(Xnz, Xc);
|
||||
|
||||
as.LWU(Xscratch0, offsetof(A32JitState, cpsr_nzcv), Xstate);
|
||||
as.LUI(Xscratch1, 0x10000);
|
||||
as.AND(Xscratch0, Xscratch0, Xscratch1);
|
||||
as.OR(Xscratch0, Xscratch0, Xnz);
|
||||
as.OR(Xscratch0, Xscratch0, Xc);
|
||||
as.SW(Xscratch0, offsetof(A32JitState, cpsr_nzcv), Xstate);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetCFlag>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32OrQFlag>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetGEFlags>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetGEFlags>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetGEFlagsCompressed>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32BXWritePC>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32UpdateUpperLocationDescriptor>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32CallSupervisor>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ExceptionRaised>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32DataSynchronizationBarrier>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32DataMemoryBarrier>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32InstructionSynchronizationBarrier>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetFpscr>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetFpscr>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32GetFpscrNZCV>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32SetFpscrNZCV>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <biscuit/assembler.hpp>
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
#include "dynarmic/backend/riscv64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/riscv64/abi.h"
|
||||
#include "dynarmic/backend/riscv64/emit_context.h"
|
||||
#include "dynarmic/backend/riscv64/emit_riscv64.h"
|
||||
#include "dynarmic/backend/riscv64/reg_alloc.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32CoprocInternalOperation>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32CoprocSendOneWord>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32CoprocSendTwoWords>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32CoprocGetOneWord>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32CoprocGetTwoWords>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32CoprocLoadWords>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32CoprocStoreWords>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
|
|
@ -0,0 +1,105 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <biscuit/assembler.hpp>
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
#include "dynarmic/backend/riscv64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/riscv64/abi.h"
|
||||
#include "dynarmic/backend/riscv64/emit_context.h"
|
||||
#include "dynarmic/backend/riscv64/emit_riscv64.h"
|
||||
#include "dynarmic/backend/riscv64/reg_alloc.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ClearExclusive>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ReadMemory8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ReadMemory16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ReadMemory32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ReadMemory64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ExclusiveReadMemory8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ExclusiveReadMemory16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ExclusiveReadMemory32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ExclusiveReadMemory64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32WriteMemory8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32WriteMemory16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32WriteMemory32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32WriteMemory64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ExclusiveWriteMemory8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ExclusiveWriteMemory16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ExclusiveWriteMemory32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A32ExclusiveWriteMemory64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
200
src/dynarmic/src/dynarmic/backend/riscv64/emit_riscv64_a64.cpp
Normal file
200
src/dynarmic/src/dynarmic/backend/riscv64/emit_riscv64_a64.cpp
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <biscuit/assembler.hpp>
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
#include "dynarmic/backend/riscv64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/riscv64/abi.h"
|
||||
#include "dynarmic/backend/riscv64/emit_context.h"
|
||||
#include "dynarmic/backend/riscv64/emit_riscv64.h"
|
||||
#include "dynarmic/backend/riscv64/reg_alloc.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetCheckBit>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetCFlag>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetNZCVRaw>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetNZCVRaw>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetNZCV>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetW>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetX>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetS>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetD>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetQ>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetSP>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetFPCR>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetFPSR>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetW>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetX>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetS>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetD>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetQ>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetSP>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetFPCR>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetFPSR>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetPC>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64CallSupervisor>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExceptionRaised>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64DataCacheOperationRaised>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64InstructionCacheOperationRaised>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64DataSynchronizationBarrier>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64DataMemoryBarrier>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64InstructionSynchronizationBarrier>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetCNTFRQ>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetCNTPCT>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetCTR>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetDCZID>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetTPIDR>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64GetTPIDRRO>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64SetTPIDR>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <biscuit/assembler.hpp>
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
#include "dynarmic/backend/riscv64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/riscv64/abi.h"
|
||||
#include "dynarmic/backend/riscv64/emit_context.h"
|
||||
#include "dynarmic/backend/riscv64/emit_riscv64.h"
|
||||
#include "dynarmic/backend/riscv64/reg_alloc.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ClearExclusive>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ReadMemory8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ReadMemory16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ReadMemory32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ReadMemory64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ReadMemory128>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveReadMemory8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveReadMemory16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveReadMemory32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveReadMemory64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveReadMemory128>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64WriteMemory8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64WriteMemory16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64WriteMemory32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64WriteMemory64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64WriteMemory128>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::A64ExclusiveWriteMemory128>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
|
|
@ -0,0 +1,100 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <biscuit/assembler.hpp>
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
#include "dynarmic/backend/riscv64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/riscv64/abi.h"
|
||||
#include "dynarmic/backend/riscv64/emit_context.h"
|
||||
#include "dynarmic/backend/riscv64/emit_riscv64.h"
|
||||
#include "dynarmic/backend/riscv64/reg_alloc.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CRC32Castagnoli8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CRC32Castagnoli16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CRC32Castagnoli32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CRC32Castagnoli64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CRC32ISO8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CRC32ISO16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CRC32ISO32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CRC32ISO64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::AESDecryptSingleRound>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::AESEncryptSingleRound>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::AESInverseMixColumns>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::AESMixColumns>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SM4AccessSubstitutionBox>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SHA256Hash>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SHA256MessageSchedule0>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SHA256MessageSchedule1>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
|
|
@ -0,0 +1,572 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <biscuit/assembler.hpp>
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
#include "dynarmic/backend/riscv64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/riscv64/abi.h"
|
||||
#include "dynarmic/backend/riscv64/emit_context.h"
|
||||
#include "dynarmic/backend/riscv64/emit_riscv64.h"
|
||||
#include "dynarmic/backend/riscv64/reg_alloc.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Pack2x32To1x64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Pack2x64To1x128>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::LeastSignificantWord>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::LeastSignificantHalf>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::LeastSignificantByte>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::MostSignificantWord>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::MostSignificantBit>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::IsZero32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::IsZero64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::TestBit>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ConditionalSelect32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ConditionalSelect64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ConditionalSelectNZCV>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::LogicalShiftLeft32>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto& operand_arg = args[0];
|
||||
auto& shift_arg = args[1];
|
||||
auto& carry_arg = args[2];
|
||||
|
||||
// TODO: Add full implementation
|
||||
ASSERT(carry_inst != nullptr);
|
||||
ASSERT(shift_arg.IsImmediate());
|
||||
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
auto Xcarry_out = ctx.reg_alloc.WriteX(carry_inst);
|
||||
auto Xoperand = ctx.reg_alloc.ReadX(operand_arg);
|
||||
auto Xcarry_in = ctx.reg_alloc.ReadX(carry_arg);
|
||||
RegAlloc::Realize(Xresult, Xcarry_out, Xoperand, Xcarry_in);
|
||||
|
||||
const u8 shift = shift_arg.GetImmediateU8();
|
||||
|
||||
if (shift == 0) {
|
||||
as.ADDW(Xresult, Xoperand, biscuit::zero);
|
||||
as.ADDW(Xcarry_out, Xcarry_in, biscuit::zero);
|
||||
} else if (shift < 32) {
|
||||
as.SRLIW(Xcarry_out, Xoperand, 32 - shift);
|
||||
as.ANDI(Xcarry_out, Xcarry_out, 1);
|
||||
as.SLLIW(Xresult, Xoperand, shift);
|
||||
} else if (shift > 32) {
|
||||
as.MV(Xresult, biscuit::zero);
|
||||
as.MV(Xcarry_out, biscuit::zero);
|
||||
} else {
|
||||
as.ANDI(Xcarry_out, Xresult, 1);
|
||||
as.MV(Xresult, biscuit::zero);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::LogicalShiftLeft64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::LogicalShiftRight32>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto& operand_arg = args[0];
|
||||
auto& shift_arg = args[1];
|
||||
|
||||
// TODO: Add full implementation
|
||||
ASSERT(carry_inst == nullptr);
|
||||
ASSERT(shift_arg.IsImmediate());
|
||||
|
||||
const u8 shift = shift_arg.GetImmediateU8();
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
auto Xoperand = ctx.reg_alloc.ReadX(operand_arg);
|
||||
RegAlloc::Realize(Xresult, Xoperand);
|
||||
|
||||
if (shift <= 31) {
|
||||
as.SRLIW(Xresult, Xoperand, shift);
|
||||
} else {
|
||||
as.MV(Xresult, biscuit::zero);
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::LogicalShiftRight64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ArithmeticShiftRight32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ArithmeticShiftRight64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::RotateRight32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::RotateRight64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::RotateRightExtended>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::LogicalShiftLeftMasked32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::LogicalShiftLeftMasked64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::LogicalShiftRightMasked32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::LogicalShiftRightMasked64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ArithmeticShiftRightMasked32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ArithmeticShiftRightMasked64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::RotateRightMasked32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::RotateRightMasked64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<size_t bitsize>
|
||||
static void AddImmWithFlags(biscuit::Assembler& as, biscuit::GPR rd, biscuit::GPR rs, u64 imm, biscuit::GPR flags) {
|
||||
static_assert(bitsize == 32 || bitsize == 64);
|
||||
if constexpr (bitsize == 32) {
|
||||
imm = static_cast<u32>(imm);
|
||||
}
|
||||
if (mcl::bit::sign_extend<12>(imm) == imm) {
|
||||
bitsize == 32 ? as.ADDIW(rd, rs, imm) : as.ADDI(rd, rs, imm);
|
||||
} else {
|
||||
as.LI(Xscratch0, imm);
|
||||
bitsize == 32 ? as.ADDW(rd, rs, Xscratch0) : as.ADD(rd, rs, Xscratch0);
|
||||
}
|
||||
|
||||
// N
|
||||
as.SEQZ(flags, rd);
|
||||
as.SLLI(flags, flags, 30);
|
||||
// Z
|
||||
as.SLTZ(Xscratch1, rd);
|
||||
as.SLLI(Xscratch1, Xscratch1, 31);
|
||||
as.OR(flags, flags, Xscratch1);
|
||||
|
||||
if constexpr (bitsize == 32) {
|
||||
// C
|
||||
if (mcl::bit::sign_extend<12>(imm) == imm) {
|
||||
as.ADDI(Xscratch1, rs, imm);
|
||||
} else {
|
||||
as.ADD(Xscratch1, rs, Xscratch0);
|
||||
}
|
||||
as.SRLI(Xscratch1, Xscratch1, 3);
|
||||
as.LUI(Xscratch0, 0x20000);
|
||||
as.AND(Xscratch1, Xscratch1, Xscratch0);
|
||||
as.OR(flags, flags, Xscratch1);
|
||||
// V
|
||||
as.LI(Xscratch0, imm);
|
||||
as.ADD(Xscratch1, rs, Xscratch0);
|
||||
as.XOR(Xscratch0, Xscratch0, rs);
|
||||
as.NOT(Xscratch0, Xscratch0);
|
||||
as.XOR(Xscratch1, Xscratch1, rs);
|
||||
as.AND(Xscratch1, Xscratch0, Xscratch1);
|
||||
as.SRLIW(Xscratch1, Xscratch1, 31);
|
||||
as.SLLI(Xscratch1, Xscratch1, 28);
|
||||
as.OR(flags, flags, Xscratch1);
|
||||
} else {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
}
|
||||
|
||||
template<size_t bitsize, bool sub>
|
||||
static void EmitAddSub(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto nzcv_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetNZCVFromOp);
|
||||
const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
auto Xresult = ctx.reg_alloc.WriteX(inst);
|
||||
auto Xa = ctx.reg_alloc.ReadX(args[0]);
|
||||
|
||||
if (overflow_inst) {
|
||||
UNIMPLEMENTED();
|
||||
} else if (nzcv_inst) {
|
||||
if (args[1].IsImmediate()) {
|
||||
const u64 imm = args[1].GetImmediateU64();
|
||||
|
||||
if (args[2].IsImmediate()) {
|
||||
auto Xflags = ctx.reg_alloc.WriteX(nzcv_inst);
|
||||
RegAlloc::Realize(Xresult, Xflags, Xa);
|
||||
|
||||
if (args[2].GetImmediateU1()) {
|
||||
AddImmWithFlags<bitsize>(as, *Xresult, *Xa, sub ? ~imm : imm + 1, *Xflags);
|
||||
} else {
|
||||
AddImmWithFlags<bitsize>(as, *Xresult, *Xa, sub ? -imm : imm, *Xflags);
|
||||
}
|
||||
} else {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
} else {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
} else {
|
||||
if (args[1].IsImmediate()) {
|
||||
const u64 imm = args[1].GetImmediateU64();
|
||||
|
||||
if (args[2].IsImmediate()) {
|
||||
UNIMPLEMENTED();
|
||||
} else {
|
||||
auto Xnzcv = ctx.reg_alloc.ReadX(args[2]);
|
||||
RegAlloc::Realize(Xresult, Xa, Xnzcv);
|
||||
|
||||
as.LUI(Xscratch0, 0x20000);
|
||||
as.AND(Xscratch0, Xnzcv, Xscratch0);
|
||||
as.SRLI(Xscratch0, Xscratch0, 29);
|
||||
as.LI(Xscratch1, imm);
|
||||
as.ADD(Xscratch0, Xscratch0, Xscratch1);
|
||||
as.ADDW(Xresult, Xa, Xscratch0);
|
||||
}
|
||||
} else {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Add32>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitAddSub<32, false>(as, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Add64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Sub32>(biscuit::Assembler& as, EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitAddSub<32, true>(as, ctx, inst);
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Sub64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Mul32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Mul64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedMultiplyHigh64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedMultiplyHigh64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedDiv32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedDiv64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedDiv32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedDiv64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::And32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::And64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::AndNot32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::AndNot64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Eor32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Eor64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Or32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Or64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Not32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::Not64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignExtendByteToWord>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignExtendHalfToWord>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignExtendByteToLong>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignExtendHalfToLong>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignExtendWordToLong>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ZeroExtendByteToWord>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ZeroExtendHalfToWord>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ZeroExtendByteToLong>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ZeroExtendHalfToLong>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ZeroExtendWordToLong>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ZeroExtendLongToQuad>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ByteReverseWord>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ByteReverseHalf>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ByteReverseDual>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CountLeadingZeros32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::CountLeadingZeros64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ExtractRegister32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ExtractRegister64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ReplicateBit32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::ReplicateBit64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::MaxSigned32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::MaxSigned64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::MaxUnsigned32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::MaxUnsigned64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::MinSigned32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::MinSigned64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::MinUnsigned32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::MinUnsigned64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
|
|
@ -0,0 +1,460 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <biscuit/assembler.hpp>
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
#include "dynarmic/backend/riscv64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/riscv64/abi.h"
|
||||
#include "dynarmic/backend/riscv64/emit_context.h"
|
||||
#include "dynarmic/backend/riscv64/emit_riscv64.h"
|
||||
#include "dynarmic/backend/riscv64/reg_alloc.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPAbs16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPAbs32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPAbs64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPAdd32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPAdd64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPCompare32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPCompare64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDiv32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDiv64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMax32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMax64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMaxNumeric32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMaxNumeric64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMin32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMin64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMinNumeric32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMinNumeric64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMul32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMul64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMulAdd16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMulAdd32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMulAdd64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMulSub16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMulSub32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMulSub64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMulX32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPMulX64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPNeg16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPNeg32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPNeg64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRecipEstimate16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRecipEstimate32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRecipEstimate64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRecipExponent16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRecipExponent32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRecipExponent64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRecipStepFused16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRecipStepFused32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRecipStepFused64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRoundInt16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRoundInt32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRoundInt64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRSqrtEstimate16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRSqrtEstimate32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRSqrtEstimate64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRSqrtStepFused16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRSqrtStepFused32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPRSqrtStepFused64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSqrt32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSqrt64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSub32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSub64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPHalfToDouble>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPHalfToSingle>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSingleToDouble>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSingleToHalf>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDoubleToHalf>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDoubleToSingle>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDoubleToFixedS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDoubleToFixedS32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDoubleToFixedS64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDoubleToFixedU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDoubleToFixedU32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPDoubleToFixedU64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPHalfToFixedS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPHalfToFixedS32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPHalfToFixedS64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPHalfToFixedU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPHalfToFixedU32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPHalfToFixedU64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSingleToFixedS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSingleToFixedS32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSingleToFixedS64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSingleToFixedU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSingleToFixedU32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPSingleToFixedU64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedU16ToSingle>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedS16ToSingle>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedU16ToDouble>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedS16ToDouble>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedU32ToSingle>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedS32ToSingle>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedU32ToDouble>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedS32ToDouble>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedU64ToDouble>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedU64ToSingle>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedS64ToDouble>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPFixedS64ToSingle>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
|
|
@ -0,0 +1,190 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <biscuit/assembler.hpp>
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
#include "dynarmic/backend/riscv64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/riscv64/abi.h"
|
||||
#include "dynarmic/backend/riscv64/emit_context.h"
|
||||
#include "dynarmic/backend/riscv64/emit_riscv64.h"
|
||||
#include "dynarmic/backend/riscv64/reg_alloc.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedAddU8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedAddS8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSubU8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSubS8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedAddU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedAddS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSubU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSubS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedAddSubU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedAddSubS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSubAddU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSubAddS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingAddU8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingAddS8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingSubU8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingSubS8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingAddU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingAddS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingSubU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingSubS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingAddSubU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingAddSubS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingSubAddU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedHalvingSubAddS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSaturatedAddU8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSaturatedAddS8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSaturatedSubU8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSaturatedSubS8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSaturatedAddU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSaturatedAddS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSaturatedSubU16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSaturatedSubS16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedAbsDiffSumU8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::PackedSelect>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
|
|
@ -0,0 +1,130 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <biscuit/assembler.hpp>
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
#include "dynarmic/backend/riscv64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/riscv64/abi.h"
|
||||
#include "dynarmic/backend/riscv64/emit_context.h"
|
||||
#include "dynarmic/backend/riscv64/emit_riscv64.h"
|
||||
#include "dynarmic/backend/riscv64/reg_alloc.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedAddWithFlag32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedSubWithFlag32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturation>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedSaturation>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedAdd8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedAdd16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedAdd32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedAdd64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedDoublingMultiplyReturnHigh16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedDoublingMultiplyReturnHigh32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedSub8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedSub16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedSub32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::SignedSaturatedSub64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedSaturatedAdd8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedSaturatedAdd16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedSaturatedAdd32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedSaturatedAdd64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedSaturatedSub8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedSaturatedSub16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedSaturatedSub32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::UnsignedSaturatedSub64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
1395
src/dynarmic/src/dynarmic/backend/riscv64/emit_riscv64_vector.cpp
Normal file
1395
src/dynarmic/src/dynarmic/backend/riscv64/emit_riscv64_vector.cpp
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,355 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <biscuit/assembler.hpp>
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
#include "dynarmic/backend/riscv64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/riscv64/abi.h"
|
||||
#include "dynarmic/backend/riscv64/emit_context.h"
|
||||
#include "dynarmic/backend/riscv64/emit_riscv64.h"
|
||||
#include "dynarmic/backend/riscv64/reg_alloc.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorAbs16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorAbs32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorAbs64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorAdd32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorAdd64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorDiv32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorDiv64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorEqual16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorEqual32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorEqual64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorFromHalf32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorFromSignedFixed32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorFromSignedFixed64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorFromUnsignedFixed32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorFromUnsignedFixed64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorGreater32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorGreater64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorGreaterEqual32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorGreaterEqual64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMax32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMax64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMaxNumeric32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMaxNumeric64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMin32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMin64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMinNumeric32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMinNumeric64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMul32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMul64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMulAdd16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMulAdd32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMulAdd64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMulX32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorMulX64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorNeg16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorNeg32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorNeg64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorPairedAdd32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorPairedAdd64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorPairedAddLower32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorPairedAddLower64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRecipEstimate16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRecipEstimate32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRecipEstimate64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRecipStepFused16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRecipStepFused32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRecipStepFused64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRoundInt16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRoundInt32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRoundInt64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRSqrtEstimate16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRSqrtEstimate32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRSqrtEstimate64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRSqrtStepFused16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRSqrtStepFused32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorRSqrtStepFused64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorSqrt32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorSqrt64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorSub32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorSub64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorToHalf32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorToSignedFixed16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorToSignedFixed32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorToSignedFixed64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorToUnsignedFixed16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorToUnsignedFixed32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::FPVectorToUnsignedFixed64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
|
|
@ -0,0 +1,100 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <biscuit/assembler.hpp>
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
#include "dynarmic/backend/riscv64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/riscv64/abi.h"
|
||||
#include "dynarmic/backend/riscv64/emit_context.h"
|
||||
#include "dynarmic/backend/riscv64/emit_riscv64.h"
|
||||
#include "dynarmic/backend/riscv64/reg_alloc.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorSignedSaturatedAdd8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorSignedSaturatedAdd16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorSignedSaturatedAdd32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorSignedSaturatedAdd64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorSignedSaturatedSub8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorSignedSaturatedSub16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorSignedSaturatedSub32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorSignedSaturatedSub64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorUnsignedSaturatedAdd8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorUnsignedSaturatedAdd16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorUnsignedSaturatedAdd32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorUnsignedSaturatedAdd64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorUnsignedSaturatedSub8>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorUnsignedSaturatedSub16>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorUnsignedSaturatedSub32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::VectorUnsignedSaturatedSub64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
352
src/dynarmic/src/dynarmic/backend/riscv64/reg_alloc.cpp
Normal file
352
src/dynarmic/src/dynarmic/backend/riscv64/reg_alloc.cpp
Normal file
|
|
@ -0,0 +1,352 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/riscv64/reg_alloc.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include <mcl/mp/metavalue/lift_value.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/common/always_false.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
constexpr size_t spill_offset = offsetof(StackLayout, spill);
|
||||
constexpr size_t spill_slot_size = sizeof(decltype(StackLayout::spill)::value_type);
|
||||
|
||||
static bool IsValuelessType(IR::Type type) {
|
||||
switch (type) {
|
||||
case IR::Type::Table:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
IR::Type Argument::GetType() const {
|
||||
return value.GetType();
|
||||
}
|
||||
|
||||
bool Argument::IsImmediate() const {
|
||||
return value.IsImmediate();
|
||||
}
|
||||
|
||||
bool Argument::GetImmediateU1() const {
|
||||
return value.GetU1();
|
||||
}
|
||||
|
||||
u8 Argument::GetImmediateU8() const {
|
||||
const u64 imm = value.GetImmediateAsU64();
|
||||
ASSERT(imm < 0x100);
|
||||
return u8(imm);
|
||||
}
|
||||
|
||||
u16 Argument::GetImmediateU16() const {
|
||||
const u64 imm = value.GetImmediateAsU64();
|
||||
ASSERT(imm < 0x10000);
|
||||
return u16(imm);
|
||||
}
|
||||
|
||||
u32 Argument::GetImmediateU32() const {
|
||||
const u64 imm = value.GetImmediateAsU64();
|
||||
ASSERT(imm < 0x100000000);
|
||||
return u32(imm);
|
||||
}
|
||||
|
||||
u64 Argument::GetImmediateU64() const {
|
||||
return value.GetImmediateAsU64();
|
||||
}
|
||||
|
||||
IR::Cond Argument::GetImmediateCond() const {
|
||||
ASSERT(IsImmediate() && GetType() == IR::Type::Cond);
|
||||
return value.GetCond();
|
||||
}
|
||||
|
||||
IR::AccType Argument::GetImmediateAccType() const {
|
||||
ASSERT(IsImmediate() && GetType() == IR::Type::AccType);
|
||||
return value.GetAccType();
|
||||
}
|
||||
|
||||
bool HostLocInfo::Contains(const IR::Inst* value) const {
|
||||
return std::find(values.begin(), values.end(), value) != values.end();
|
||||
}
|
||||
|
||||
void HostLocInfo::SetupScratchLocation() {
|
||||
ASSERT(IsCompletelyEmpty());
|
||||
realized = true;
|
||||
}
|
||||
|
||||
bool HostLocInfo::IsCompletelyEmpty() const {
|
||||
return values.empty() && !locked && !realized && !accumulated_uses && !expected_uses && !uses_this_inst;
|
||||
}
|
||||
|
||||
void HostLocInfo::UpdateUses() {
|
||||
accumulated_uses += uses_this_inst;
|
||||
uses_this_inst = 0;
|
||||
|
||||
if (accumulated_uses == expected_uses) {
|
||||
values.clear();
|
||||
accumulated_uses = 0;
|
||||
expected_uses = 0;
|
||||
}
|
||||
}
|
||||
|
||||
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
|
||||
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
|
||||
for (size_t i = 0; i < inst->NumArgs(); i++) {
|
||||
const IR::Value arg = inst->GetArg(i);
|
||||
ret[i].value = arg;
|
||||
if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) {
|
||||
ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined");
|
||||
ValueInfo(arg.GetInst()).uses_this_inst++;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool RegAlloc::IsValueLive(IR::Inst* inst) const {
|
||||
return !!ValueLocation(inst);
|
||||
}
|
||||
|
||||
void RegAlloc::UpdateAllUses() {
|
||||
for (auto& gpr : gprs) {
|
||||
gpr.UpdateUses();
|
||||
}
|
||||
for (auto& fpr : fprs) {
|
||||
fpr.UpdateUses();
|
||||
}
|
||||
for (auto& spill : spills) {
|
||||
spill.UpdateUses();
|
||||
}
|
||||
}
|
||||
|
||||
void RegAlloc::DefineAsExisting(IR::Inst* inst, Argument& arg) {
|
||||
ASSERT(!ValueLocation(inst));
|
||||
|
||||
if (arg.value.IsImmediate()) {
|
||||
inst->ReplaceUsesWith(arg.value);
|
||||
return;
|
||||
}
|
||||
|
||||
auto& info = ValueInfo(arg.value.GetInst());
|
||||
info.values.emplace_back(inst);
|
||||
info.expected_uses += inst->UseCount();
|
||||
}
|
||||
|
||||
void RegAlloc::AssertNoMoreUses() const {
|
||||
const auto is_empty = [](const auto& i) { return i.IsCompletelyEmpty(); };
|
||||
ASSERT(std::all_of(gprs.begin(), gprs.end(), is_empty));
|
||||
ASSERT(std::all_of(fprs.begin(), fprs.end(), is_empty));
|
||||
ASSERT(std::all_of(spills.begin(), spills.end(), is_empty));
|
||||
}
|
||||
|
||||
template<HostLoc::Kind kind>
|
||||
u32 RegAlloc::GenerateImmediate(const IR::Value& value) {
|
||||
// TODO
|
||||
// ASSERT(value.GetType() != IR::Type::U1);
|
||||
|
||||
if constexpr (kind == HostLoc::Kind::Gpr) {
|
||||
const u32 new_location_index = AllocateRegister(gprs, gpr_order);
|
||||
SpillGpr(new_location_index);
|
||||
gprs[new_location_index].SetupScratchLocation();
|
||||
|
||||
as.LI(biscuit::GPR{new_location_index}, value.GetImmediateAsU64());
|
||||
|
||||
return new_location_index;
|
||||
} else if constexpr (kind == HostLoc::Kind::Fpr) {
|
||||
UNIMPLEMENTED();
|
||||
} else {
|
||||
static_assert(Common::always_false_v<mcl::mp::lift_value<kind>>);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<HostLoc::Kind required_kind>
|
||||
u32 RegAlloc::RealizeReadImpl(const IR::Value& value) {
|
||||
if (value.IsImmediate()) {
|
||||
return GenerateImmediate<required_kind>(value);
|
||||
}
|
||||
|
||||
const auto current_location = ValueLocation(value.GetInst());
|
||||
ASSERT(current_location);
|
||||
|
||||
if (current_location->kind == required_kind) {
|
||||
ValueInfo(*current_location).realized = true;
|
||||
return current_location->index;
|
||||
}
|
||||
|
||||
ASSERT(!ValueInfo(*current_location).realized);
|
||||
ASSERT(!ValueInfo(*current_location).locked);
|
||||
|
||||
if constexpr (required_kind == HostLoc::Kind::Gpr) {
|
||||
const u32 new_location_index = AllocateRegister(gprs, gpr_order);
|
||||
SpillGpr(new_location_index);
|
||||
|
||||
switch (current_location->kind) {
|
||||
case HostLoc::Kind::Gpr:
|
||||
ASSERT_FALSE("Logic error");
|
||||
break;
|
||||
case HostLoc::Kind::Fpr:
|
||||
as.FMV_X_D(biscuit::GPR(new_location_index), biscuit::FPR{current_location->index});
|
||||
// ASSERT size fits
|
||||
break;
|
||||
case HostLoc::Kind::Spill:
|
||||
as.LD(biscuit::GPR{new_location_index}, spill_offset + current_location->index * spill_slot_size, biscuit::sp);
|
||||
break;
|
||||
}
|
||||
|
||||
gprs[new_location_index] = std::exchange(ValueInfo(*current_location), {});
|
||||
gprs[new_location_index].realized = true;
|
||||
return new_location_index;
|
||||
} else if constexpr (required_kind == HostLoc::Kind::Fpr) {
|
||||
const u32 new_location_index = AllocateRegister(fprs, fpr_order);
|
||||
SpillFpr(new_location_index);
|
||||
|
||||
switch (current_location->kind) {
|
||||
case HostLoc::Kind::Gpr:
|
||||
as.FMV_D_X(biscuit::FPR{new_location_index}, biscuit::GPR(current_location->index));
|
||||
break;
|
||||
case HostLoc::Kind::Fpr:
|
||||
ASSERT_FALSE("Logic error");
|
||||
break;
|
||||
case HostLoc::Kind::Spill:
|
||||
as.FLD(biscuit::FPR{new_location_index}, spill_offset + current_location->index * spill_slot_size, biscuit::sp);
|
||||
break;
|
||||
}
|
||||
|
||||
fprs[new_location_index] = std::exchange(ValueInfo(*current_location), {});
|
||||
fprs[new_location_index].realized = true;
|
||||
return new_location_index;
|
||||
} else {
|
||||
static_assert(Common::always_false_v<mcl::mp::lift_value<required_kind>>);
|
||||
}
|
||||
}
|
||||
|
||||
template<HostLoc::Kind required_kind>
|
||||
u32 RegAlloc::RealizeWriteImpl(const IR::Inst* value) {
|
||||
ASSERT(!ValueLocation(value));
|
||||
|
||||
const auto setup_location = [&](HostLocInfo& info) {
|
||||
info = {};
|
||||
info.values.emplace_back(value);
|
||||
info.locked = true;
|
||||
info.realized = true;
|
||||
info.expected_uses = value->UseCount();
|
||||
};
|
||||
|
||||
if constexpr (required_kind == HostLoc::Kind::Gpr) {
|
||||
const u32 new_location_index = AllocateRegister(gprs, gpr_order);
|
||||
SpillGpr(new_location_index);
|
||||
setup_location(gprs[new_location_index]);
|
||||
return new_location_index;
|
||||
} else if constexpr (required_kind == HostLoc::Kind::Fpr) {
|
||||
const u32 new_location_index = AllocateRegister(fprs, fpr_order);
|
||||
SpillFpr(new_location_index);
|
||||
setup_location(fprs[new_location_index]);
|
||||
return new_location_index;
|
||||
} else {
|
||||
static_assert(Common::always_false_v<mcl::mp::lift_value<required_kind>>);
|
||||
}
|
||||
}
|
||||
|
||||
template u32 RegAlloc::RealizeReadImpl<HostLoc::Kind::Gpr>(const IR::Value& value);
|
||||
template u32 RegAlloc::RealizeReadImpl<HostLoc::Kind::Fpr>(const IR::Value& value);
|
||||
template u32 RegAlloc::RealizeWriteImpl<HostLoc::Kind::Gpr>(const IR::Inst* value);
|
||||
template u32 RegAlloc::RealizeWriteImpl<HostLoc::Kind::Fpr>(const IR::Inst* value);
|
||||
|
||||
u32 RegAlloc::AllocateRegister(const std::array<HostLocInfo, 32>& regs, const std::vector<u32>& order) const {
|
||||
const auto empty = std::find_if(order.begin(), order.end(), [&](u32 i) { return regs[i].values.empty() && !regs[i].locked; });
|
||||
if (empty != order.end()) {
|
||||
return *empty;
|
||||
}
|
||||
|
||||
std::vector<u32> candidates;
|
||||
std::copy_if(order.begin(), order.end(), std::back_inserter(candidates), [&](u32 i) { return !regs[i].locked; });
|
||||
|
||||
// TODO: LRU
|
||||
std::uniform_int_distribution<size_t> dis{0, candidates.size() - 1};
|
||||
return candidates[dis(rand_gen)];
|
||||
}
|
||||
|
||||
void RegAlloc::SpillGpr(u32 index) {
|
||||
ASSERT(!gprs[index].locked && !gprs[index].realized);
|
||||
if (gprs[index].values.empty()) {
|
||||
return;
|
||||
}
|
||||
const u32 new_location_index = FindFreeSpill();
|
||||
as.SD(biscuit::GPR{index}, spill_offset + new_location_index * spill_slot_size, biscuit::sp);
|
||||
spills[new_location_index] = std::exchange(gprs[index], {});
|
||||
}
|
||||
|
||||
void RegAlloc::SpillFpr(u32 index) {
|
||||
ASSERT(!fprs[index].locked && !fprs[index].realized);
|
||||
if (fprs[index].values.empty()) {
|
||||
return;
|
||||
}
|
||||
const u32 new_location_index = FindFreeSpill();
|
||||
as.FSD(biscuit::FPR{index}, spill_offset + new_location_index * spill_slot_size, biscuit::sp);
|
||||
spills[new_location_index] = std::exchange(fprs[index], {});
|
||||
}
|
||||
|
||||
u32 RegAlloc::FindFreeSpill() const {
|
||||
const auto iter = std::find_if(spills.begin(), spills.end(), [](const HostLocInfo& info) { return info.values.empty(); });
|
||||
ASSERT_MSG(iter != spills.end(), "All spill locations are full");
|
||||
return static_cast<u32>(iter - spills.begin());
|
||||
}
|
||||
|
||||
std::optional<HostLoc> RegAlloc::ValueLocation(const IR::Inst* value) const {
|
||||
const auto contains_value = [value](const HostLocInfo& info) {
|
||||
return info.Contains(value);
|
||||
};
|
||||
|
||||
if (const auto iter = std::find_if(gprs.begin(), gprs.end(), contains_value); iter != gprs.end()) {
|
||||
return HostLoc{HostLoc::Kind::Gpr, static_cast<u32>(iter - gprs.begin())};
|
||||
}
|
||||
if (const auto iter = std::find_if(fprs.begin(), fprs.end(), contains_value); iter != fprs.end()) {
|
||||
return HostLoc{HostLoc::Kind::Fpr, static_cast<u32>(iter - fprs.begin())};
|
||||
}
|
||||
if (const auto iter = std::find_if(spills.begin(), spills.end(), contains_value); iter != spills.end()) {
|
||||
return HostLoc{HostLoc::Kind::Spill, static_cast<u32>(iter - spills.begin())};
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
HostLocInfo& RegAlloc::ValueInfo(HostLoc host_loc) {
|
||||
switch (host_loc.kind) {
|
||||
case HostLoc::Kind::Gpr:
|
||||
return gprs[static_cast<size_t>(host_loc.index)];
|
||||
case HostLoc::Kind::Fpr:
|
||||
return fprs[static_cast<size_t>(host_loc.index)];
|
||||
case HostLoc::Kind::Spill:
|
||||
return spills[static_cast<size_t>(host_loc.index)];
|
||||
}
|
||||
ASSERT_FALSE("RegAlloc::ValueInfo: Invalid HostLoc::Kind");
|
||||
}
|
||||
|
||||
HostLocInfo& RegAlloc::ValueInfo(const IR::Inst* value) {
|
||||
const auto contains_value = [value](const HostLocInfo& info) {
|
||||
return info.Contains(value);
|
||||
};
|
||||
|
||||
if (const auto iter = std::find_if(gprs.begin(), gprs.end(), contains_value); iter != gprs.end()) {
|
||||
return *iter;
|
||||
}
|
||||
if (const auto iter = std::find_if(fprs.begin(), fprs.end(), contains_value); iter != gprs.end()) {
|
||||
return *iter;
|
||||
}
|
||||
if (const auto iter = std::find_if(spills.begin(), spills.end(), contains_value); iter != gprs.end()) {
|
||||
return *iter;
|
||||
}
|
||||
ASSERT_FALSE("RegAlloc::ValueInfo: Value not found");
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
191
src/dynarmic/src/dynarmic/backend/riscv64/reg_alloc.h
Normal file
191
src/dynarmic/src/dynarmic/backend/riscv64/reg_alloc.h
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <optional>
|
||||
#include <random>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <biscuit/assembler.hpp>
|
||||
#include <biscuit/registers.hpp>
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include <mcl/type_traits/is_instance_of_template.hpp>
|
||||
#include <ankerl/unordered_dense.h>
|
||||
|
||||
#include "dynarmic/backend/riscv64/stack_layout.h"
|
||||
#include "dynarmic/ir/cond.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/value.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
class RegAlloc;
|
||||
|
||||
struct HostLoc {
|
||||
enum class Kind {
|
||||
Gpr,
|
||||
Fpr,
|
||||
Spill,
|
||||
} kind;
|
||||
u32 index;
|
||||
};
|
||||
|
||||
struct Argument {
|
||||
public:
|
||||
using copyable_reference = std::reference_wrapper<Argument>;
|
||||
|
||||
IR::Type GetType() const;
|
||||
bool IsImmediate() const;
|
||||
|
||||
bool GetImmediateU1() const;
|
||||
u8 GetImmediateU8() const;
|
||||
u16 GetImmediateU16() const;
|
||||
u32 GetImmediateU32() const;
|
||||
u64 GetImmediateU64() const;
|
||||
IR::Cond GetImmediateCond() const;
|
||||
IR::AccType GetImmediateAccType() const;
|
||||
|
||||
private:
|
||||
friend class RegAlloc;
|
||||
explicit Argument(RegAlloc& reg_alloc)
|
||||
: reg_alloc{reg_alloc} {}
|
||||
|
||||
bool allocated = false;
|
||||
RegAlloc& reg_alloc;
|
||||
IR::Value value;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct RAReg {
|
||||
public:
|
||||
static constexpr HostLoc::Kind kind = std::is_base_of_v<biscuit::FPR, T>
|
||||
? HostLoc::Kind::Fpr
|
||||
: HostLoc::Kind::Gpr;
|
||||
|
||||
operator T() const { return *reg; }
|
||||
|
||||
T operator*() const { return *reg; }
|
||||
|
||||
const T* operator->() const { return &*reg; }
|
||||
|
||||
~RAReg();
|
||||
|
||||
private:
|
||||
friend class RegAlloc;
|
||||
explicit RAReg(RegAlloc& reg_alloc, bool write, const IR::Value& value);
|
||||
|
||||
void Realize();
|
||||
|
||||
RegAlloc& reg_alloc;
|
||||
bool write;
|
||||
const IR::Value value;
|
||||
std::optional<T> reg;
|
||||
};
|
||||
|
||||
struct HostLocInfo final {
|
||||
std::vector<const IR::Inst*> values;
|
||||
size_t locked = 0;
|
||||
bool realized = false;
|
||||
size_t uses_this_inst = 0;
|
||||
size_t accumulated_uses = 0;
|
||||
size_t expected_uses = 0;
|
||||
|
||||
bool Contains(const IR::Inst*) const;
|
||||
void SetupScratchLocation();
|
||||
bool IsCompletelyEmpty() const;
|
||||
void UpdateUses();
|
||||
};
|
||||
|
||||
class RegAlloc {
|
||||
public:
|
||||
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
|
||||
|
||||
explicit RegAlloc(biscuit::Assembler& as, std::vector<u32> gpr_order, std::vector<u32> fpr_order)
|
||||
: as{as}, gpr_order{gpr_order}, fpr_order{fpr_order}, rand_gen{std::random_device{}()} {}
|
||||
|
||||
ArgumentInfo GetArgumentInfo(IR::Inst* inst);
|
||||
bool IsValueLive(IR::Inst* inst) const;
|
||||
|
||||
auto ReadX(Argument& arg) { return RAReg<biscuit::GPR>{*this, false, arg.value}; }
|
||||
auto ReadD(Argument& arg) { return RAReg<biscuit::FPR>{*this, false, arg.value}; }
|
||||
|
||||
auto WriteX(IR::Inst* inst) { return RAReg<biscuit::GPR>{*this, true, IR::Value{inst}}; }
|
||||
auto WriteD(IR::Inst* inst) { return RAReg<biscuit::FPR>{*this, true, IR::Value{inst}}; }
|
||||
|
||||
void DefineAsExisting(IR::Inst* inst, Argument& arg);
|
||||
|
||||
void SpillAll();
|
||||
|
||||
template<typename... Ts>
|
||||
static void Realize(Ts&... rs) {
|
||||
static_assert((mcl::is_instance_of_template<RAReg, Ts>() && ...));
|
||||
(rs.Realize(), ...);
|
||||
}
|
||||
|
||||
void UpdateAllUses();
|
||||
void AssertNoMoreUses() const;
|
||||
|
||||
private:
|
||||
template<typename>
|
||||
friend struct RAReg;
|
||||
|
||||
template<HostLoc::Kind kind>
|
||||
u32 GenerateImmediate(const IR::Value& value);
|
||||
template<HostLoc::Kind kind>
|
||||
u32 RealizeReadImpl(const IR::Value& value);
|
||||
template<HostLoc::Kind kind>
|
||||
u32 RealizeWriteImpl(const IR::Inst* value);
|
||||
|
||||
u32 AllocateRegister(const std::array<HostLocInfo, 32>& regs, const std::vector<u32>& order) const;
|
||||
void SpillGpr(u32 index);
|
||||
void SpillFpr(u32 index);
|
||||
u32 FindFreeSpill() const;
|
||||
|
||||
std::optional<HostLoc> ValueLocation(const IR::Inst* value) const;
|
||||
HostLocInfo& ValueInfo(HostLoc host_loc);
|
||||
HostLocInfo& ValueInfo(const IR::Inst* value);
|
||||
|
||||
biscuit::Assembler& as;
|
||||
std::vector<u32> gpr_order;
|
||||
std::vector<u32> fpr_order;
|
||||
|
||||
std::array<HostLocInfo, 32> gprs;
|
||||
std::array<HostLocInfo, 32> fprs;
|
||||
std::array<HostLocInfo, SpillCount> spills;
|
||||
|
||||
mutable std::mt19937 rand_gen;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
RAReg<T>::RAReg(RegAlloc& reg_alloc, bool write, const IR::Value& value)
|
||||
: reg_alloc{reg_alloc}, write{write}, value{value} {
|
||||
if (!write && !value.IsImmediate()) {
|
||||
reg_alloc.ValueInfo(value.GetInst()).locked++;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
RAReg<T>::~RAReg() {
|
||||
if (!value.IsImmediate()) {
|
||||
reg_alloc.ValueInfo(value.GetInst()).locked--;
|
||||
}
|
||||
if (reg) {
|
||||
reg_alloc.ValueInfo(HostLoc{kind, reg->Index()}).realized = false;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void RAReg<T>::Realize() {
|
||||
reg = T{write ? reg_alloc.RealizeWriteImpl<kind>(value.GetInst()) : reg_alloc.RealizeReadImpl<kind>(value)};
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
33
src/dynarmic/src/dynarmic/backend/riscv64/stack_layout.h
Normal file
33
src/dynarmic/src/dynarmic/backend/riscv64/stack_layout.h
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2024 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
constexpr size_t SpillCount = 64;
|
||||
|
||||
struct alignas(16) StackLayout {
|
||||
s64 cycles_remaining;
|
||||
s64 cycles_to_run;
|
||||
|
||||
std::array<u64, SpillCount> spill;
|
||||
|
||||
u32 save_host_fpcr;
|
||||
u32 save_host_fpsr;
|
||||
|
||||
bool check_bit;
|
||||
};
|
||||
|
||||
static_assert(sizeof(StackLayout) % 16 == 0);
|
||||
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
1299
src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp
Normal file
1299
src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp
Normal file
File diff suppressed because it is too large
Load diff
146
src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h
Normal file
146
src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <tuple>
|
||||
|
||||
#include <ankerl/unordered_dense.h>
|
||||
|
||||
#include "dynarmic/backend/block_range_information.h"
|
||||
#include "dynarmic/backend/x64/a32_jitstate.h"
|
||||
#include "dynarmic/backend/x64/emit_x64.h"
|
||||
#include "dynarmic/backend/x64/reg_alloc.h"
|
||||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
#include "dynarmic/interface/A32/a32.h"
|
||||
#include "dynarmic/interface/A32/config.h"
|
||||
#include "dynarmic/ir/terminal.h"
|
||||
|
||||
namespace Dynarmic::Backend::X64 {
|
||||
|
||||
class RegAlloc;
|
||||
|
||||
struct A32EmitContext final : public EmitContext {
|
||||
A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block);
|
||||
|
||||
A32::LocationDescriptor Location() const;
|
||||
A32::LocationDescriptor EndLocation() const;
|
||||
bool IsSingleStep() const;
|
||||
FP::FPCR FPCR(bool fpcr_controlled = true) const override;
|
||||
|
||||
bool HasOptimization(OptimizationFlag flag) const override {
|
||||
return conf.HasOptimization(flag);
|
||||
}
|
||||
|
||||
const A32::UserConfig& conf;
|
||||
};
|
||||
|
||||
class A32EmitX64 final : public EmitX64 {
|
||||
public:
|
||||
A32EmitX64(BlockOfCode& code, A32::UserConfig conf, A32::Jit* jit_interface);
|
||||
~A32EmitX64() override;
|
||||
|
||||
/**
|
||||
* Emit host machine code for a basic block with intermediate representation `block`.
|
||||
* @note block is modified.
|
||||
*/
|
||||
BlockDescriptor Emit(IR::Block& block);
|
||||
|
||||
void ClearCache() override;
|
||||
|
||||
void InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges);
|
||||
|
||||
protected:
|
||||
void EmitCondPrelude(const A32EmitContext& ctx);
|
||||
|
||||
struct FastDispatchEntry {
|
||||
u64 location_descriptor = 0xFFFF'FFFF'FFFF'FFFFull;
|
||||
const void* code_ptr = nullptr;
|
||||
};
|
||||
static_assert(sizeof(FastDispatchEntry) == 0x10);
|
||||
static constexpr u64 fast_dispatch_table_mask = 0xFFFF0;
|
||||
static constexpr size_t fast_dispatch_table_size = 0x10000;
|
||||
void ClearFastDispatchTable();
|
||||
void GenFastmemFallbacks();
|
||||
void GenTerminalHandlers();
|
||||
|
||||
// Microinstruction emitters
|
||||
#define OPCODE(...)
|
||||
#define A32OPC(name, type, ...) void EmitA32##name(A32EmitContext& ctx, IR::Inst* inst);
|
||||
#define A64OPC(...)
|
||||
#include "dynarmic/ir/opcodes.inc"
|
||||
#undef OPCODE
|
||||
#undef A32OPC
|
||||
#undef A64OPC
|
||||
|
||||
// Helpers
|
||||
std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
|
||||
|
||||
// Fastmem information
|
||||
using DoNotFastmemMarker = std::tuple<IR::LocationDescriptor, unsigned>;
|
||||
struct FastmemPatchInfo {
|
||||
u64 resume_rip;
|
||||
u64 callback;
|
||||
DoNotFastmemMarker marker;
|
||||
bool recompile;
|
||||
};
|
||||
std::optional<DoNotFastmemMarker> ShouldFastmem(A32EmitContext& ctx, IR::Inst* inst) const;
|
||||
FakeCall FastmemCallback(u64 rip);
|
||||
|
||||
// Memory access helpers
|
||||
void EmitCheckMemoryAbort(A32EmitContext& ctx, IR::Inst* inst, Xbyak::Label* end = nullptr);
|
||||
template<std::size_t bitsize, auto callback>
|
||||
void EmitMemoryRead(A32EmitContext& ctx, IR::Inst* inst);
|
||||
template<std::size_t bitsize, auto callback>
|
||||
void EmitMemoryWrite(A32EmitContext& ctx, IR::Inst* inst);
|
||||
template<std::size_t bitsize, auto callback>
|
||||
void EmitExclusiveReadMemory(A32EmitContext& ctx, IR::Inst* inst);
|
||||
template<std::size_t bitsize, auto callback>
|
||||
void EmitExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst);
|
||||
template<std::size_t bitsize, auto callback>
|
||||
void EmitExclusiveReadMemoryInline(A32EmitContext& ctx, IR::Inst* inst);
|
||||
template<std::size_t bitsize, auto callback>
|
||||
void EmitExclusiveWriteMemoryInline(A32EmitContext& ctx, IR::Inst* inst);
|
||||
|
||||
// Terminal instruction emitters
|
||||
void EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location);
|
||||
void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
|
||||
void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
|
||||
void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
|
||||
void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
|
||||
void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
|
||||
void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
|
||||
void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
|
||||
void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
|
||||
void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
|
||||
|
||||
// Patching
|
||||
void Unpatch(const IR::LocationDescriptor& target_desc) override;
|
||||
void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
|
||||
void EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
|
||||
void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
|
||||
void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;
|
||||
|
||||
const A32::UserConfig conf;
|
||||
RegAlloc reg_alloc; //reusable reg alloc
|
||||
BlockRangeInformation<u32> block_ranges;
|
||||
std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
|
||||
ankerl::unordered_dense::map<u64, FastmemPatchInfo> fastmem_patch_info;
|
||||
std::set<DoNotFastmemMarker> do_not_fastmem;
|
||||
std::map<std::tuple<bool, size_t, int, int>, void (*)()> read_fallbacks;
|
||||
std::map<std::tuple<bool, size_t, int, int>, void (*)()> write_fallbacks;
|
||||
std::map<std::tuple<bool, size_t, int, int>, void (*)()> exclusive_write_fallbacks;
|
||||
void (*memory_read_128)() = nullptr; // Dummy
|
||||
void (*memory_write_128)() = nullptr; // Dummy
|
||||
const void* terminal_handler_pop_rsb_hint;
|
||||
const void* terminal_handler_fast_dispatch_hint = nullptr;
|
||||
FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr;
|
||||
A32::Jit* jit_interface;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue