diff --git a/.ci/ios/build.sh b/.ci/ios/build.sh new file mode 100755 index 0000000000..f0d6147993 --- /dev/null +++ b/.ci/ios/build.sh @@ -0,0 +1,19 @@ +#!/bin/sh -ex + +# SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +# SPDX-License-Identifier: GPL-3.0-or-later + +WORK_DIR="$PWD" +xcrun --sdk iphoneos --show-sdk-path + +# TODO: support iphonesimulator sdk + +cmake -G Xcode -B build/ios \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=16.0 \ + -DCMAKE_OSX_SYSROOT=iphoneos \ + -DCMAKE_SYSTEM_NAME=iOS \ + -DCMAKE_OSX_ARCHITECTURES="arm64" \ + -DCMAKE_BUILD_TYPE=Release \ + "$@" + +cmake --build build/ios -t eden-ios --config Release diff --git a/.ci/license-header.sh b/.ci/license-header.sh index 6b19f91185..b674234c7c 100755 --- a/.ci/license-header.sh +++ b/.ci/license-header.sh @@ -115,7 +115,7 @@ for file in $FILES; do *.cmake|*.sh|*CMakeLists.txt) begin="#" ;; - *.kt*|*.cpp|*.h|*.qml) + *.kt*|*.cpp|*.h|*.qml|*.swift|*.mm) begin="//" ;; *) diff --git a/.patch/boost/0002-ios-fix.patch b/.patch/boost/0002-ios-fix.patch new file mode 100644 index 0000000000..4f5d495154 --- /dev/null +++ b/.patch/boost/0002-ios-fix.patch @@ -0,0 +1,31 @@ +diff --git a/libs/process/src/shell.cpp b/libs/process/src/shell.cpp +index bf4bbfd8..bc4aae89 100644 +--- a/libs/process/src/shell.cpp ++++ b/libs/process/src/shell.cpp +@@ -19,7 +19,7 @@ + #if defined(BOOST_PROCESS_V2_WINDOWS) + #include + #include +-#elif !defined(__OpenBSD__) && !defined(__ANDROID__) ++#elif !defined(__OpenBSD__) && !defined(__ANDROID__) && !(defined(__APPLE__) && TARGET_OS_IPHONE) + #include + #endif + +@@ -30,7 +30,7 @@ BOOST_PROCESS_V2_DECL const error_category& get_shell_category() + { + return system_category(); + } +-#elif !defined(__OpenBSD__) && !defined(__ANDROID__) ++#elif !defined(__OpenBSD__) && !defined(__ANDROID__) && !(defined(__APPLE__) && TARGET_OS_IPHONE) + + struct shell_category_t final : public error_category + { +@@ -99,7 +99,7 @@ auto shell::args() const-> args_type + return input_.c_str(); + } + +-#elif !defined(__OpenBSD__) && !defined(__ANDROID__) ++#elif !defined(__OpenBSD__) && !defined(__ANDROID__) && !(defined(__APPLE__) && TARGET_OS_IPHONE) + + void shell::parse_() + { diff --git a/.patch/spirv-tools/0003-ios-fix.patch b/.patch/spirv-tools/0003-ios-fix.patch new file mode 100644 index 0000000000..7d5710ef68 --- /dev/null +++ b/.patch/spirv-tools/0003-ios-fix.patch @@ -0,0 +1,33 @@ +diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt +index 7ab2319..333e325 100644 +--- a/source/CMakeLists.txt ++++ b/source/CMakeLists.txt +@@ -151,9 +151,11 @@ add_custom_command(OUTPUT ${SPIRV_TOOLS_BUILD_VERSION_INC} + COMMENT "Update build-version.inc in the SPIRV-Tools build directory (if necessary).") + # Convenience target for standalone generation of the build-version.inc file. + # This is not required for any dependence chain. +-add_custom_target(spirv-tools-build-version +- DEPENDS ${SPIRV_TOOLS_BUILD_VERSION_INC}) +-set_property(TARGET spirv-tools-build-version PROPERTY FOLDER "SPIRV-Tools build") ++if (NOT IOS) ++ add_custom_target(spirv-tools-build-version ++ DEPENDS ${SPIRV_TOOLS_BUILD_VERSION_INC}) ++ set_property(TARGET spirv-tools-build-version PROPERTY FOLDER "SPIRV-Tools build") ++endif() + + list(APPEND PCH_DEPENDS + ${CORE_TABLES_HEADER_INC_FILE} +@@ -338,8 +340,11 @@ function(spirv_tools_default_target_options target) + ) + set_property(TARGET ${target} PROPERTY FOLDER "SPIRV-Tools libraries") + spvtools_check_symbol_exports(${target}) +- add_dependencies(${target} +- spirv-tools-build-version core_tables extinst_tables) ++ if (IOS) ++ add_dependencies(${target} core_tables extinst_tables) ++ else () ++ add_dependencies(${target} spirv-tools-build-version core_tables extinst_tables) ++ endif() + endfunction() + + if (SPIRV_TOOLS_BUILD_SHARED) diff --git a/CMakeLists.txt b/CMakeLists.txt index 42717c496d..4afadff0a3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,6 +21,29 @@ include(CMakeDependentOption) include(CTest) include(CPMUtil) +# TODO(crueter): Make this more automatic. +if (IOS) + list(APPEND CMAKE_FIND_ROOT_PATH "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "") + list(APPEND CMAKE_PROGRAM_PATH "/opt/homebrew/bin" CACHE INTERNAL "") + + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH CACHE INTERNAL "") + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH CACHE INTERNAL "") + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH CACHE INTERNAL "") + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH CACHE INTERNAL "") + + list(LENGTH CMAKE_OSX_ARCHITECTURES _arch_len) + if (NOT _arch_len EQUAL 1) + message(FATAL_ERROR "CMAKE_OSX_ARCHITECTURES must contain exactly one architecture.") + endif() + + # TODO(crueter): Proper handling for this. + if (CMAKE_OSX_ARCHITECTURES STREQUAL arm64) + set(CMAKE_SYSTEM_PROCESSOR aarch64) + else() + set(CMAKE_SYSTEM_PROCESSOR ${CMAKE_OSX_ARCHITECTURES}) + endif() +endif() + if (NOT DEFINED ARCHITECTURE) message(FATAL_ERROR "Architecture didn't make it out of scope, did you delete DetectArchitecture.cmake?") endif() @@ -42,7 +65,7 @@ if (PLATFORM_NETBSD) set(ENV{PKG_CONFIG_PATH} "${PKG_CONFIG_PATH}:${CMAKE_SYSROOT}/usr/pkg/lib/ffmpeg7/pkgconfig") endif() -cmake_dependent_option(YUZU_STATIC_ROOM "Build a static room executable only (CI only)" OFF "PLATFORM_LINUX" OFF) +cmake_dependent_option(YUZU_STATIC_ROOM "Build a static room executable only (CI only)" OFF "PLATFORM_LINUX OR WIN32 OR (APPLE AND NOT IOS)" OFF) if (YUZU_STATIC_ROOM) set(YUZU_ROOM ON) set(YUZU_ROOM_STANDALONE ON) @@ -67,9 +90,15 @@ if (YUZU_STATIC_ROOM) endif() # qt stuff -option(ENABLE_QT "Enable the Qt frontend" ON) +if (IOS OR ANDROID) + set(_default_qt OFF) +else() + set(_default_qt ON) +endif() + +option(ENABLE_QT "Enable the Qt frontend" ${_default_qt}) option(ENABLE_QT_TRANSLATION "Enable translations for the Qt frontend" OFF) -option(ENABLE_UPDATE_CHECKER "Enable update checker (for Qt and Android)" OFF) +cmake_dependent_option(ENABLE_UPDATE_CHECKER "Enable update checker (for Qt and Android)" OFF "ENABLE_QT OR ANDROID" OFF) cmake_dependent_option(YUZU_USE_QT_MULTIMEDIA "Use QtMultimedia for Camera" OFF "NOT YUZU_USE_BUNDLED_QT" OFF) cmake_dependent_option(YUZU_USE_QT_WEB_ENGINE "Use QtWebEngine for web applet implementation" OFF "NOT YUZU_USE_BUNDLED_QT" OFF) set(YUZU_QT_MIRROR "" CACHE STRING "What mirror to use for downloading the bundled Qt libraries") @@ -170,31 +199,32 @@ if (MSVC AND NOT CXX_CLANG) set(CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS_INIT} /W3 /WX-") endif() -# TODO(crueter): Cleanup, each dep that has a bundled option should allow to choose between bundled, external, system -cmake_dependent_option(YUZU_USE_EXTERNAL_SDL2 "Build SDL2 from external source" OFF "NOT MSVC;NOT ANDROID" OFF) -cmake_dependent_option(YUZU_USE_BUNDLED_SDL2 "Download bundled SDL2 build" "${MSVC}" "NOT ANDROID" OFF) - -option(ENABLE_CUBEB "Enables the cubeb audio backend" ON) - set(EXT_DEFAULT OFF) -if (MSVC OR ANDROID) +if (MSVC OR ANDROID OR IOS) set(EXT_DEFAULT ON) endif() +# TODO(crueter): Cleanup, each dep that has a bundled option should allow to choose between bundled, external, system +cmake_dependent_option(YUZU_USE_EXTERNAL_SDL2 "Build SDL2 from external source" OFF "NOT MSVC;NOT ANDROID" OFF) +cmake_dependent_option(YUZU_USE_BUNDLED_SDL2 "Download bundled SDL2 build" "${EXT_DEFAULT}" "NOT ANDROID" OFF) + +# TODO(crueter): did not find header 'AudioHardware.h' in framework 'CoreAudio' +cmake_dependent_option(ENABLE_CUBEB "Enables the cubeb audio backend" ON "NOT IOS" OFF) + # ffmpeg option(YUZU_USE_BUNDLED_FFMPEG "Download bundled FFmpeg" ${EXT_DEFAULT}) cmake_dependent_option(YUZU_USE_EXTERNAL_FFMPEG "Build FFmpeg from external source" "${PLATFORM_SUN}" "NOT WIN32 AND NOT ANDROID" OFF) # sirit set(BUNDLED_SIRIT_DEFAULT OFF) -if (MSVC AND NOT (CMAKE_BUILD_TYPE MATCHES "Deb") OR ANDROID) +if ((MSVC AND NOT (CMAKE_BUILD_TYPE MATCHES "Deb")) OR ANDROID OR IOS) set(BUNDLED_SIRIT_DEFAULT ON) endif() option(YUZU_USE_BUNDLED_SIRIT "Download bundled sirit" ${BUNDLED_SIRIT_DEFAULT}) # FreeBSD 15+ has libusb, versions below should disable it -cmake_dependent_option(ENABLE_LIBUSB "Enable the use of LibUSB" ON "WIN32 OR PLATFORM_LINUX OR PLATFORM_FREEBSD OR APPLE" OFF) +cmake_dependent_option(ENABLE_LIBUSB "Enable the use of LibUSB" ON "WIN32 OR PLATFORM_LINUX OR PLATFORM_FREEBSD OR (APPLE AND NOT IOS)" OFF) cmake_dependent_option(ENABLE_OPENGL "Enable OpenGL" ON "NOT (WIN32 AND ARCHITECTURE_arm64) AND NOT APPLE" OFF) mark_as_advanced(FORCE ENABLE_OPENGL) @@ -212,10 +242,10 @@ option(YUZU_LEGACY "Apply patches that improve compatibility with older GPUs (e. option(NIGHTLY_BUILD "Use Nightly qualifiers in the update checker and build metadata" OFF) -cmake_dependent_option(YUZU_ROOM "Enable dedicated room functionality" ON "NOT ANDROID" OFF) +cmake_dependent_option(YUZU_ROOM "Enable dedicated room functionality" ON "NOT ANDROID AND NOT IOS" OFF) cmake_dependent_option(YUZU_ROOM_STANDALONE "Enable standalone room executable" ON "YUZU_ROOM" OFF) -cmake_dependent_option(YUZU_CMD "Compile the eden-cli executable" ON "NOT ANDROID" OFF) +cmake_dependent_option(YUZU_CMD "Compile the eden-cli executable" ON "NOT ANDROID AND NOT IOS" OFF) cmake_dependent_option(YUZU_CRASH_DUMPS "Compile crash dump (Minidump) support" OFF "WIN32 OR PLATFORM_LINUX" OFF) @@ -283,7 +313,7 @@ if (YUZU_ROOM) add_compile_definitions(YUZU_ROOM) endif() -if ((ANDROID OR APPLE OR UNIX) AND (NOT PLATFORM_LINUX OR ANDROID) AND NOT WIN32) +if ((ANDROID OR APPLE OR UNIX OR IOS) AND (NOT PLATFORM_LINUX OR ANDROID) AND NOT WIN32) if(CXX_APPLE OR CXX_CLANG) # libc++ has stop_token and jthread as experimental set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-library") @@ -359,7 +389,10 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -find_package(RenderDoc MODULE) +find_package(RenderDoc MODULE QUIET) +if (NOT RenderDoc_FOUND) + message(WARNING "RenderDoc not found. Some debugging features may be disabled.") +endif() # openssl funniness if (YUZU_USE_BUNDLED_OPENSSL) @@ -484,9 +517,15 @@ endfunction() # Platform-specific library requirements # Put these BEFORE EXTERNALS or Boost WILL die # ============================================= - if (APPLE) - foreach(fw Carbon Metal Cocoa IOKit CoreVideo CoreMedia) + set(_libs Metal IOKit CoreVideo CoreMedia) + if (IOS) + list(APPEND _libs objc) + else() + list(APPEND _libs Carbon Cocoa) + endif() + + foreach(fw ${_libs}) find_library(${fw}_LIBRARY ${fw} REQUIRED) list(APPEND PLATFORM_LIBRARIES ${${fw}_LIBRARY}) endforeach() diff --git a/CMakeModules/CPMUtil.cmake b/CMakeModules/CPMUtil.cmake index b992f24083..ca39a50331 100644 --- a/CMakeModules/CPMUtil.cmake +++ b/CMakeModules/CPMUtil.cmake @@ -3,7 +3,7 @@ set(CPM_SOURCE_CACHE "${PROJECT_SOURCE_DIR}/.cache/cpm" CACHE STRING "" FORCE) -if(MSVC OR ANDROID) +if(MSVC OR ANDROID OR IOS) set(BUNDLED_DEFAULT ON) else() set(BUNDLED_DEFAULT OFF) @@ -690,8 +690,10 @@ function(AddCIPackage) set(pkgname linux-amd64) elseif(PLATFORM_LINUX AND ARCHITECTURE_arm64) set(pkgname linux-aarch64) - elseif(APPLE) + elseif(APPLE AND NOT IOS) set(pkgname macos-universal) + elseif(IOS AND ARCHITECTURE_arm64) + set(pkgname ios-aarch64) endif() if (DEFINED pkgname AND NOT "${pkgname}" IN_LIST DISABLED_PLATFORMS) diff --git a/cpmfile.json b/cpmfile.json index 1bb29afae4..8542fcddc7 100644 --- a/cpmfile.json +++ b/cpmfile.json @@ -17,7 +17,8 @@ "version": "1.57", "find_args": "CONFIG OPTIONAL_COMPONENTS headers context system fiber filesystem", "patches": [ - "0001-clang-cl.patch" + "0001-clang-cl.patch", + "0002-ios-fix.patch" ] }, "fmt": { diff --git a/docs/CPMUtil/AddCIPackage.md b/docs/CPMUtil/AddCIPackage.md index bc7c1ccfad..7319b514ff 100644 --- a/docs/CPMUtil/AddCIPackage.md +++ b/docs/CPMUtil/AddCIPackage.md @@ -18,3 +18,4 @@ - `linux-amd64` - `linux-aarch64` - `macos-universal` + - `ios-aarch64` diff --git a/docs/CPMUtil/AddJsonPackage.md b/docs/CPMUtil/AddJsonPackage.md index 464cd1731b..e655d794f5 100644 --- a/docs/CPMUtil/AddJsonPackage.md +++ b/docs/CPMUtil/AddJsonPackage.md @@ -61,7 +61,8 @@ In order: OpenSSL CI, Boost (tag + artifact), Opus (options + find_args), discor "version": "3.6.0", "min_version": "1.1.1", "disabled_platforms": [ - "macos-universal" + "macos-universal", + "ios-aarch64" ] }, "boost": { diff --git a/docs/Caveats.md b/docs/Caveats.md index d554f3ff77..ad3fde1876 100644 --- a/docs/Caveats.md +++ b/docs/Caveats.md @@ -4,6 +4,7 @@ - [Arch Linux](#arch-linux) - [Gentoo Linux](#gentoo-linux) - [macOS](#macos) +- [iOS](#ios) - [Solaris](#solaris) - [HaikuOS](#haikuos) - [OpenBSD](#openbsd) @@ -31,6 +32,16 @@ If you're having issues with building, always consult that ebuild. macOS is largely untested. Expect crashes, significant Vulkan issues, and other fun stuff. +## iOS + +iOS has a dedicated build script, we **highly** recommend using that instead of doing anything else, we don't support any other configuration than the one present in said build script. + +To build, it's simply as easy as doing +```sh +chmod +x .ci/ios/build.sh +.ci/ios/build.sh +``` + ## Solaris Always consult [the OpenIndiana package list](https://pkg.openindiana.org/hipster/en/index.shtml) to cross-verify availability. diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 00bdf10a4f..2ebc09113b 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -228,6 +228,10 @@ if (VulkanMemoryAllocator_ADDED) endif() # httplib +if (IOS) + set(HTTPLIB_USE_BROTLI_IF_AVAILABLE OFF) +endif() + AddJsonPackage(httplib) # cpp-jwt diff --git a/externals/cmake-modules/DetectArchitecture.cmake b/externals/cmake-modules/DetectArchitecture.cmake index 105963c8c2..a7e60eba30 100644 --- a/externals/cmake-modules/DetectArchitecture.cmake +++ b/externals/cmake-modules/DetectArchitecture.cmake @@ -35,16 +35,21 @@ This file is based off of Yuzu and Dynarmic. # Do note that situations where multiple architectures are defined # should NOT be too dependent on the architecture # otherwise, you may end up with duplicate code -if (CMAKE_OSX_ARCHITECTURES) +if (DEFINED CMAKE_OSX_ARCHITECTURES) set(MULTIARCH_BUILD 1) set(ARCHITECTURE "${CMAKE_OSX_ARCHITECTURES}") - - # hope and pray the architecture names match - foreach(ARCH IN ${CMAKE_OSX_ARCHITECTURES}) - set(ARCHITECTURE_${ARCH} 1 PARENT_SCOPE) - add_definitions(-DARCHITECTURE_${ARCH}=1) - endforeach() - + if (IOS) + # TODO: Right... the toolchain file won't properly accomodate OSX_ARCHITECTURE + # they aren't defining it as a list properly I assume? + set(ARCHITECTURE_arm64 1) + add_definitions(-DARCHITECTURE_arm64=1) + else () + # hope and pray the architecture names match + foreach(ARCH ${CMAKE_OSX_ARCHITECTURES}) + set(ARCHITECTURE_${ARCH} 1) + add_definitions(-DARCHITECTURE_${ARCH}=1) + endforeach() + endif() return() endif() @@ -218,4 +223,4 @@ if (NOT DEFINED ARCHITECTURE) add_definitions(-DARCHITECTURE_GENERIC=1) endif() -message(STATUS "[DetectArchitecture] Target architecture: ${ARCHITECTURE}") \ No newline at end of file +message(STATUS "[DetectArchitecture] Target architecture: ${ARCHITECTURE}") diff --git a/externals/cmake-modules/DetectPlatform.cmake b/externals/cmake-modules/DetectPlatform.cmake index 6475884f1f..eec94d839c 100644 --- a/externals/cmake-modules/DetectPlatform.cmake +++ b/externals/cmake-modules/DetectPlatform.cmake @@ -51,6 +51,12 @@ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") set(CXX_APPLE ON) endif() +# This fixes some quirks with xcrun or weird iOS toolchain cmake files +if (IOS) + unset(CXX_CLANG) + set(CXX_APPLE ON) +endif() + # https://gitlab.kitware.com/cmake/cmake/-/merge_requests/11112 # This works totally fine on MinGW64, but not CLANG{,ARM}64 if(MINGW AND CXX_CLANG) diff --git a/externals/cpmfile.json b/externals/cpmfile.json index f849426a4d..45400f0801 100644 --- a/externals/cpmfile.json +++ b/externals/cpmfile.json @@ -23,7 +23,7 @@ "package": "sirit", "name": "sirit", "repo": "eden-emulator/sirit", - "version": "1.0.4" + "version": "1.0.5" }, "httplib": { "repo": "yhirose/cpp-httplib", @@ -36,7 +36,8 @@ "0002-fix-zstd.patch" ], "options": [ - "HTTPLIB_REQUIRE_OPENSSL ON" + "HTTPLIB_REQUIRE_OPENSSL ON", + "HTTPLIB_DISABLE_MACOSX_AUTOMATIC_ROOT_CERTIFICATES ON" ] }, "cpp-jwt": { @@ -111,7 +112,8 @@ ], "patches": [ "0001-netbsd-fix.patch", - "0002-allow-static-only.patch" + "0002-allow-static-only.patch", + "0003-ios-fix.patch" ] }, "spirv-headers": { diff --git a/externals/ffmpeg/CMakeLists.txt b/externals/ffmpeg/CMakeLists.txt index 3140f8e545..e635ab61a9 100644 --- a/externals/ffmpeg/CMakeLists.txt +++ b/externals/ffmpeg/CMakeLists.txt @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +# SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project # SPDX-License-Identifier: GPL-3.0-or-later # SPDX-FileCopyrightText: 2021 yuzu Emulator Project @@ -11,9 +11,9 @@ set(FFmpeg_HWACCEL_FLAGS) set(FFmpeg_HWACCEL_INCLUDE_DIRS) set(FFmpeg_HWACCEL_LDFLAGS) -if (UNIX AND NOT ANDROID) +if (UNIX AND NOT ANDROID AND NOT IOS) find_package(PkgConfig REQUIRED) - if (NOT ANDROID) + if (NOT ANDROID AND NOT IOS) pkg_check_modules(LIBVA libva) pkg_check_modules(CUDA cuda) pkg_check_modules(FFNVCODEC ffnvcodec) @@ -182,6 +182,10 @@ else() find_program(BASH_PROGRAM bash REQUIRED) set(FFmpeg_CROSS_COMPILE_FLAGS "") + # `configure` parameters builds only exactly what yuzu needs from FFmpeg + # `--disable-vdpau` is needed to avoid linking issues + set(FFmpeg_CC ${CMAKE_C_COMPILER_LAUNCHER} ${CMAKE_C_COMPILER}) + set(FFmpeg_CXX ${CMAKE_CXX_COMPILER_LAUNCHER} ${CMAKE_CXX_COMPILER}) if (ANDROID) string(TOLOWER "${CMAKE_HOST_SYSTEM_NAME}" FFmpeg_HOST_SYSTEM_NAME) set(TOOLCHAIN "${ANDROID_NDK}/toolchains/llvm/prebuilt/${FFmpeg_HOST_SYSTEM_NAME}-${CMAKE_HOST_SYSTEM_PROCESSOR}") @@ -197,12 +201,23 @@ else() --extra-ldflags="--ld-path=${TOOLCHAIN}/bin/ld.lld" --extra-ldflags="-nostdlib" ) + elseif(IOS) + execute_process(COMMAND xcrun --sdk iphoneos --show-sdk-path OUTPUT_VARIABLE SYSROOT) + # Lovely extra newline apple adds that **we** must remove... thank you apple! + string(STRIP "${SYSROOT}" SYSROOT) + set(FFmpeg_CC xcrun --sdk iphoneos clang -arch arm64) + set(FFmpeg_CXX xcrun --sdk iphoneos clang++ -arch arm64) + list(APPEND FFmpeg_CROSS_COMPILE_FLAGS + --arch=arm64 + --enable-cross-compile + --sysroot="${SYSROOT}" + --extra-ldflags="-miphoneos-version-min=16.0" + --install-name-dir='@rpath' + --disable-audiotoolbox + --disable-videotoolbox + ) endif() - # `configure` parameters builds only exactly what yuzu needs from FFmpeg - # `--disable-vdpau` is needed to avoid linking issues - set(FFmpeg_CC ${CMAKE_C_COMPILER_LAUNCHER} ${CMAKE_C_COMPILER}) - set(FFmpeg_CXX ${CMAKE_CXX_COMPILER_LAUNCHER} ${CMAKE_CXX_COMPILER}) add_custom_command( OUTPUT ${FFmpeg_MAKEFILE} diff --git a/externals/libusb/CMakeLists.txt b/externals/libusb/CMakeLists.txt index 47b54f43cc..361e43a832 100644 --- a/externals/libusb/CMakeLists.txt +++ b/externals/libusb/CMakeLists.txt @@ -24,7 +24,8 @@ if (MINGW OR PLATFORM_LINUX OR APPLE) message(FATAL_ERROR "Required program `autoconf` not found.") endif() - find_program(LIBTOOLIZE libtoolize) + find_program(LIBTOOLIZE + NAMES libtoolize glibtoolize) if ("${LIBTOOLIZE}" STREQUAL "LIBTOOLIZE-NOTFOUND") message(FATAL_ERROR "Required program `libtoolize` not found.") endif() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 600b985609..ad8adc7a04 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -127,13 +127,15 @@ else() add_compile_options( $<$:-Werror=all> $<$:-Werror=extra> - $<$:-Werror=missing-declarations> $<$:-Werror=shadow> $<$:-Werror=unused> $<$:-Wno-attributes> $<$:-Wno-invalid-offsetof> $<$:-Wno-unused-parameter> $<$:-Wno-missing-field-initializers>) + if (NOT IOS) + add_compile_options($<$:-Werror=missing-declarations>) + endif() if (CXX_CLANG OR CXX_ICC OR CXX_APPLE) # Clang, AppleClang, or Intel C++ if (NOT MSVC) @@ -249,4 +251,9 @@ if (ANDROID) target_include_directories(yuzu-android PRIVATE android/app/src/main) endif() +if (IOS) + add_subdirectory(ios) + add_compile_options($<$:-Wno-error>) +endif() + include(GenerateDepHashes) diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 1ee4794272..00a25a5744 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -144,7 +144,8 @@ add_library( zstd_compression.cpp zstd_compression.h fs/ryujinx_compat.h fs/ryujinx_compat.cpp - fs/symlink.h fs/symlink.cpp) + fs/symlink.h fs/symlink.cpp + httplib.h) if(WIN32) target_sources(common PRIVATE windows/timer_resolution.cpp @@ -242,7 +243,7 @@ else() target_link_libraries(common PUBLIC Boost::headers) endif() -target_link_libraries(common PUBLIC Boost::filesystem Boost::context) +target_link_libraries(common PUBLIC Boost::filesystem Boost::context httplib::httplib) if (lz4_ADDED) target_include_directories(common PRIVATE ${lz4_SOURCE_DIR}/lib) diff --git a/src/common/device_power_state.cpp b/src/common/device_power_state.cpp index 2dfa7dc305..01bcedeba2 100644 --- a/src/common/device_power_state.cpp +++ b/src/common/device_power_state.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #include "device_power_state.h" @@ -14,11 +14,14 @@ extern std::atomic g_has_battery; #elif defined(__APPLE__) #include -#if TARGET_OS_MAC +#if defined(TARGET_OS_MAC) && TARGET_OS_MAC +#if TARGET_OS_IPHONE +// ios doesnt have this +#else #include #include #endif - +#endif #elif defined(__linux__) #include #include @@ -48,7 +51,9 @@ namespace Common { info.percentage = g_battery_percentage.load(std::memory_order_relaxed); info.charging = g_is_charging.load(std::memory_order_relaxed); info.has_battery = g_has_battery.load(std::memory_order_relaxed); - +#elif defined(__APPLE__) && TARGET_OS_IPHONE + // Not implemented + info.has_battery = false; #elif defined(__APPLE__) && TARGET_OS_MAC CFTypeRef info_ref = IOPSCopyPowerSourcesInfo(); CFArrayRef sources = IOPSCopyPowerSourcesList(info_ref); @@ -96,7 +101,6 @@ namespace Common { #else info.has_battery = false; #endif - return info; } } diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index 04f3a65778..11f03867d5 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -27,7 +27,11 @@ #include #elif defined(__APPLE__) #include +#if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE +// Not available on iOS for some fucking stupid reason... +#else #include +#endif #include #include #elif defined(__FreeBSD__) diff --git a/src/common/httplib.h b/src/common/httplib.h new file mode 100644 index 0000000000..2f971bd5f3 --- /dev/null +++ b/src/common/httplib.h @@ -0,0 +1,9 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#define CPPHTTPLIB_DISABLE_MACOSX_AUTOMATIC_ROOT_CERTIFICATES +#define CPPHTTPLIB_OPENSSL_SUPPORT + +#include diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp index 7bcbe737b6..a209ac3600 100644 --- a/src/common/string_util.cpp +++ b/src/common/string_util.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: 2013 Dolphin Emulator Project @@ -116,18 +116,119 @@ std::string ReplaceAll(std::string result, const std::string& src, const std::st } std::string UTF16ToUTF8(std::u16string_view input) { - std::wstring_convert, char16_t> convert; - return convert.to_bytes(input.data(), input.data() + input.size()); + std::string result; + result.reserve(input.size()); + for (size_t i = 0; i < input.size(); ++i) { + uint32_t codepoint = input[i]; + // Handle surrogate pairs + if (codepoint >= 0xD800 && codepoint <= 0xDBFF) { + if (i + 1 < input.size()) { + uint32_t low = input[i + 1]; + if (low >= 0xDC00 && low <= 0xDFFF) { + codepoint = ((codepoint - 0xD800) << 10) + (low - 0xDC00) + 0x10000; + ++i; + } + } + } + if (codepoint <= 0x7F) { + result.push_back(static_cast(codepoint)); + } else if (codepoint <= 0x7FF) { + result.push_back(static_cast(0xC0 | (codepoint >> 6))); + result.push_back(static_cast(0x80 | (codepoint & 0x3F))); + } else if (codepoint <= 0xFFFF) { + result.push_back(static_cast(0xE0 | (codepoint >> 12))); + result.push_back(static_cast(0x80 | ((codepoint >> 6) & 0x3F))); + result.push_back(static_cast(0x80 | (codepoint & 0x3F))); + } else { + result.push_back(static_cast(0xF0 | (codepoint >> 18))); + result.push_back(static_cast(0x80 | ((codepoint >> 12) & 0x3F))); + result.push_back(static_cast(0x80 | ((codepoint >> 6) & 0x3F))); + result.push_back(static_cast(0x80 | (codepoint & 0x3F))); + } + } + return result; } std::u16string UTF8ToUTF16(std::string_view input) { - std::wstring_convert, char16_t> convert; - return convert.from_bytes(input.data(), input.data() + input.size()); + std::u16string result; + size_t i = 0; + while (i < input.size()) { + uint32_t codepoint = 0; + unsigned char c = input[i]; + size_t extra = 0; + if ((c & 0x80) == 0) { + codepoint = c; + extra = 0; + } else if ((c & 0xE0) == 0xC0) { + codepoint = c & 0x1F; + extra = 1; + } else if ((c & 0xF0) == 0xE0) { + codepoint = c & 0x0F; + extra = 2; + } else if ((c & 0xF8) == 0xF0) { + codepoint = c & 0x07; + extra = 3; + } else { + // Invalid UTF-8 + ++i; + continue; + } + if (i + extra >= input.size()) break; + for (size_t j = 1; j <= extra; ++j) { + if ((input[i + j] & 0xC0) != 0x80) { + codepoint = 0xFFFD; + break; + } + codepoint = (codepoint << 6) | (input[i + j] & 0x3F); + } + if (codepoint <= 0xFFFF) { + result.push_back(static_cast(codepoint)); + } else { + codepoint -= 0x10000; + result.push_back(static_cast(0xD800 + (codepoint >> 10))); + result.push_back(static_cast(0xDC00 + (codepoint & 0x3FF))); + } + i += extra + 1; + } + return result; } std::u32string UTF8ToUTF32(std::string_view input) { - std::wstring_convert, char32_t> convert; - return convert.from_bytes(input.data(), input.data() + input.size()); + std::u32string result; + size_t i = 0; + while (i < input.size()) { + uint32_t codepoint = 0; + unsigned char c = input[i]; + size_t extra = 0; + if ((c & 0x80) == 0) { + codepoint = c; + extra = 0; + } else if ((c & 0xE0) == 0xC0) { + codepoint = c & 0x1F; + extra = 1; + } else if ((c & 0xF0) == 0xE0) { + codepoint = c & 0x0F; + extra = 2; + } else if ((c & 0xF8) == 0xF0) { + codepoint = c & 0x07; + extra = 3; + } else { + // Invalid UTF-8 + ++i; + continue; + } + if (i + extra >= input.size()) break; + for (size_t j = 1; j <= extra; ++j) { + if ((input[i + j] & 0xC0) != 0x80) { + codepoint = 0xFFFD; + break; + } + codepoint = (codepoint << 6) | (input[i + j] & 0x3F); + } + result.push_back(codepoint); + i += extra + 1; + } + return result; } #ifdef _WIN32 diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 08a2d0e2db..aad42b883b 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1264,12 +1264,15 @@ if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) hle/service/jit/jit.cpp hle/service/jit/jit.h) target_link_libraries(core PRIVATE dynarmic::dynarmic) + # Quick hack for XCode generator... + if (IOS) + target_include_directories(core PRIVATE "${CMAKE_SOURCE_DIR}/dynarmic/src") + endif() endif() target_sources(core PRIVATE hle/service/ssl/ssl_backend_openssl.cpp) target_link_libraries(core PRIVATE OpenSSL::SSL OpenSSL::Crypto) -target_compile_definitions(core PRIVATE CPPHTTPLIB_OPENSSL_SUPPORT) # TODO diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h index 46384f7e6d..abae046d3c 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic.h @@ -1,10 +1,10 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include +#include "dynarmic/src/dynarmic/interface/halt_reason.h" #include "core/arm/arm_interface.h" diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h index d93cc1cfc9..9e55d8c27a 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.h +++ b/src/core/arm/dynarmic/arm_dynarmic_32.h @@ -6,8 +6,8 @@ #pragma once -#include -#include +#include "dynarmic/src/dynarmic/interface/A32/a32.h" +#include "dynarmic/src/dynarmic/interface/code_page.h" #include "core/arm/arm_interface.h" #include "core/arm/dynarmic/dynarmic_exclusive_monitor.h" diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h index 9eff17c5fe..14b28a237b 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.h +++ b/src/core/arm/dynarmic/arm_dynarmic_64.h @@ -10,12 +10,12 @@ #include #include -#include -#include -#include "common/common_types.h" -#include "common/hash.h" -#include "core/arm/arm_interface.h" -#include "core/arm/dynarmic/dynarmic_exclusive_monitor.h" +#include "../../../dynarmic/src/dynarmic/interface/A64/a64.h" +#include "../../../dynarmic/src/dynarmic/interface/code_page.h" +#include "../../../common/common_types.h" +#include "../../../common/hash.h" +#include "../arm_interface.h" +#include "dynarmic_exclusive_monitor.h" namespace Core::Memory { class Memory; diff --git a/src/core/arm/dynarmic/dynarmic_cp15.h b/src/core/arm/dynarmic/dynarmic_cp15.h index f3d96b0d83..5cec5d0060 100644 --- a/src/core/arm/dynarmic/dynarmic_cp15.h +++ b/src/core/arm/dynarmic/dynarmic_cp15.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2017 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -5,7 +8,7 @@ #include -#include +#include "dynarmic/interface/A32/coprocessor.h" #include "common/common_types.h" namespace Core { diff --git a/src/core/arm/dynarmic/dynarmic_exclusive_monitor.h b/src/core/arm/dynarmic/dynarmic_exclusive_monitor.h index c4f22ec891..5beb40c325 100644 --- a/src/core/arm/dynarmic/dynarmic_exclusive_monitor.h +++ b/src/core/arm/dynarmic/dynarmic_exclusive_monitor.h @@ -1,9 +1,12 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once -#include +#include "dynarmic/src/dynarmic/interface/exclusive_monitor.h" #include "common/common_types.h" #include "core/arm/exclusive_monitor.h" diff --git a/src/core/arm/nce/visitor_base.h b/src/core/arm/nce/visitor_base.h index 6a2be3d9bc..65741bedac 100644 --- a/src/core/arm/nce/visitor_base.h +++ b/src/core/arm/nce/visitor_base.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2023 merryhime // SPDX-License-Identifier: GPL-2.0-or-later @@ -7,9 +10,9 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wshadow" -#include -#include -#include +#include "dynarmic/frontend/A64/a64_types.h" +#include "dynarmic/frontend/A64/decoder/a64.h" +#include "dynarmic/frontend/imm.h" #pragma GCC diagnostic pop diff --git a/src/core/hle/service/bcat/news/builtin_news.cpp b/src/core/hle/service/bcat/news/builtin_news.cpp index ed001b056b..ad7aafa4e3 100644 --- a/src/core/hle/service/bcat/news/builtin_news.cpp +++ b/src/core/hle/service/bcat/news/builtin_news.cpp @@ -15,9 +15,7 @@ #include #include -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT -#include -#endif +#include "common/httplib.h" #include #include @@ -37,7 +35,7 @@ namespace Service::News { namespace { -constexpr const char* GitHubAPI_EdenReleases = "/repos/eden-emulator/Releases/releases"; +[[maybe_unused]] constexpr const char* GitHubAPI_EdenReleases = "/repos/eden-emulator/Releases/releases"; // Cached logo data std::vector default_logo_small; @@ -104,7 +102,6 @@ std::vector TryLoadFromDisk(const std::filesystem::path& path) { std::vector DownloadImage(const std::string& url_path, const std::filesystem::path& cache_path) { LOG_INFO(Service_BCAT, "Downloading image: https://eden-emu.dev{}", url_path); -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT try { httplib::Client cli("https://eden-emu.dev"); cli.set_follow_location(true); @@ -128,7 +125,6 @@ std::vector DownloadImage(const std::string& url_path, const std::filesystem } catch (...) { LOG_WARNING(Service_BCAT, "Failed to download: {}", url_path); } -#endif return {}; } @@ -233,7 +229,6 @@ void WriteCachedJson(std::string_view json) { std::optional DownloadReleasesJson() { -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT try { httplib::SSLClient cli{"api.github.com", 443}; cli.set_connection_timeout(10); @@ -255,7 +250,7 @@ std::optional DownloadReleasesJson() { } catch (...) { LOG_WARNING(Service_BCAT, " failed to download releases"); } -#endif + return std::nullopt; } diff --git a/src/core/hle/service/jit/jit_context.cpp b/src/core/hle/service/jit/jit_context.cpp index 522d849e6f..cd0252c6ec 100644 --- a/src/core/hle/service/jit/jit_context.cpp +++ b/src/core/hle/service/jit/jit_context.cpp @@ -8,9 +8,9 @@ #include #include #include -#include -#include -#include +#include "dynarmic/interface/A64/a64.h" +#include "dynarmic/interface/A64/config.h" +#include "dynarmic/interface/code_page.h" #include "common/alignment.h" #include "common/common_funcs.h" diff --git a/src/dynarmic/CMakeModules/TargetArchitectureSpecificSources.cmake b/src/dynarmic/CMakeModules/TargetArchitectureSpecificSources.cmake deleted file mode 100644 index 7af6cc518b..0000000000 --- a/src/dynarmic/CMakeModules/TargetArchitectureSpecificSources.cmake +++ /dev/null @@ -1,29 +0,0 @@ -# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -# SPDX-License-Identifier: GPL-3.0-or-later - -function(target_architecture_specific_sources project arch) - if (NOT MULTIARCH_BUILD) - target_sources("${project}" PRIVATE ${ARGN}) - return() - endif() - - foreach(input_file IN LISTS ARGN) - if(input_file MATCHES ".cpp$") - if(NOT IS_ABSOLUTE ${input_file}) - set(input_file "${CMAKE_CURRENT_SOURCE_DIR}/${input_file}") - endif() - - set(output_file "${CMAKE_CURRENT_BINARY_DIR}/arch_gen/${input_file}") - add_custom_command( - OUTPUT "${output_file}" - COMMAND ${CMAKE_COMMAND} "-Darch=${arch}" - "-Dinput_file=${input_file}" - "-Doutput_file=${output_file}" - -P "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/impl/TargetArchitectureSpecificSourcesWrapFile.cmake" - DEPENDS "${input_file}" - VERBATIM - ) - target_sources(${project} PRIVATE "${output_file}") - endif() - endforeach() -endfunction() diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index f79d18c15a..5c008f68c3 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -1,6 +1,5 @@ # SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project # SPDX-License-Identifier: GPL-3.0-or-later -include(TargetArchitectureSpecificSources) add_library(dynarmic STATIC mcl/bit.hpp @@ -146,7 +145,7 @@ if ("x86_64" IN_LIST ARCHITECTURE) target_compile_definitions(dynarmic PRIVATE XBYAK_OLD_DISP_CHECK=1) target_link_libraries(dynarmic PRIVATE xbyak::xbyak) - target_architecture_specific_sources(dynarmic "x86_64" + target_sources(dynarmic PRIVATE backend/x64/abi.cpp backend/x64/abi.h backend/x64/block_of_code.cpp @@ -207,7 +206,7 @@ endif() if ("arm64" IN_LIST ARCHITECTURE) target_link_libraries(dynarmic PRIVATE merry::oaknut) - target_architecture_specific_sources(dynarmic "arm64" + target_sources(dynarmic PRIVATE backend/arm64/a32_jitstate.cpp backend/arm64/a32_jitstate.h backend/arm64/a64_jitstate.h diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index a0fd944041..6f53580997 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -38,33 +38,21 @@ template static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); (code.*fn)(xmm_a, xmm_b); ctx.reg_alloc.DefineValue(code, inst, xmm_a); } -template -static void EmitAVXVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - - (code.*fn)(xmm_a, xmm_a, xmm_b); - - ctx.reg_alloc.DefineValue(code, inst, xmm_a); -} - template static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 2 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); @@ -86,8 +74,8 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 2 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); @@ -111,9 +99,9 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 3 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + auto const arg2 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); @@ -139,9 +127,9 @@ static void EmitTwoArgumentFallbackWithSaturationAndImmediate(BlockOfCode& code, const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 2 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + auto const arg1 = ctx.reg_alloc.UseXmm(code, args[0]); const u8 arg2 = args[1].GetImmediateU8(); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); @@ -166,9 +154,9 @@ static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 3 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + auto const arg2 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); @@ -194,7 +182,7 @@ void EmitX64::EmitVectorGetElement8(EmitContext& ctx, IR::Inst* inst) { // TODO: DefineValue directly on Argument for index == 0 - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseXmm(code, args[0]); const Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr(code).cvt32(); if (code.HasHostFeature(HostFeature::SSE41)) { @@ -218,7 +206,7 @@ void EmitX64::EmitVectorGetElement16(EmitContext& ctx, IR::Inst* inst) { // TODO: DefineValue directly on Argument for index == 0 - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseXmm(code, args[0]); const Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.pextrw(dest, source, index); ctx.reg_alloc.DefineValue(code, inst, dest); @@ -234,10 +222,10 @@ void EmitX64::EmitVectorGetElement32(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr(code).cvt32(); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseXmm(code, args[0]); code.pextrd(dest, source, index); } else { - const Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshufd(source, source, index); code.movd(dest, source); } @@ -253,7 +241,7 @@ void EmitX64::EmitVectorGetElement64(EmitContext& ctx, IR::Inst* inst) { if (index == 0) { // TODO: DefineValue directly on Argument for index == 0 const Xbyak::Reg64 dest = ctx.reg_alloc.ScratchGpr(code).cvt64(); - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseXmm(code, args[0]); code.movq(dest, source); ctx.reg_alloc.DefineValue(code, inst, dest); return; @@ -262,10 +250,10 @@ void EmitX64::EmitVectorGetElement64(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg64 dest = ctx.reg_alloc.ScratchGpr(code).cvt64(); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseXmm(code, args[0]); code.pextrq(dest, source, 1); } else { - const Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.punpckhqdq(source, source); code.movq(dest, source); } @@ -277,7 +265,7 @@ void EmitX64::EmitVectorSetElement8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); - const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { const Xbyak::Reg8 source_elem = ctx.reg_alloc.UseGpr(code, args[2]).cvt8(); @@ -310,7 +298,7 @@ void EmitX64::EmitVectorSetElement16(EmitContext& ctx, IR::Inst* inst) { ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); - const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); const Xbyak::Reg16 source_elem = ctx.reg_alloc.UseGpr(code, args[2]).cvt16(); code.pinsrw(source_vector, source_elem.cvt32(), index); @@ -322,7 +310,7 @@ void EmitX64::EmitVectorSetElement32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); - const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { const Xbyak::Reg32 source_elem = ctx.reg_alloc.UseGpr(code, args[2]).cvt32(); @@ -345,7 +333,7 @@ void EmitX64::EmitVectorSetElement64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); - const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { const Xbyak::Reg64 source_elem = ctx.reg_alloc.UseGpr(code, args[2]); @@ -355,7 +343,7 @@ void EmitX64::EmitVectorSetElement64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, source_vector); } else { const Xbyak::Reg64 source_elem = ctx.reg_alloc.UseGpr(code, args[2]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movq(tmp, source_elem); @@ -369,72 +357,53 @@ void EmitX64::EmitVectorSetElement64(EmitContext& ctx, IR::Inst* inst) { } } -static void VectorAbs8(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& data) { - if (code.HasHostFeature(HostFeature::SSSE3)) { - code.pabsb(data, data); - } else { - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); - code.pxor(temp, temp); - code.psubb(temp, data); - code.pminub(data, temp); - } -} - -static void VectorAbs16(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& data) { - if (code.HasHostFeature(HostFeature::SSSE3)) { - code.pabsw(data, data); - } else { - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); - code.pxor(temp, temp); - code.psubw(temp, data); - code.pmaxsw(data, temp); - } -} - -static void VectorAbs32(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& data) { - if (code.HasHostFeature(HostFeature::SSSE3)) { - code.pabsd(data, data); - } else { - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); - code.movdqa(temp, data); - code.psrad(temp, 31); - code.pxor(data, temp); - code.psubd(data, temp); - } -} - -static void VectorAbs64(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& data) { - if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - code.vpabsq(data, data); - } else { - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); - code.pshufd(temp, data, 0b11110101); - code.psrad(temp, 31); - code.pxor(data, temp); - code.psubq(data, temp); - } -} - static void EmitVectorAbs(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); - + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); switch (esize) { case 8: - VectorAbs8(code, ctx, data); + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pabsb(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.pxor(temp, temp); + code.psubb(temp, data); + code.pminub(data, temp); + } break; case 16: - VectorAbs16(code, ctx, data); + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pabsw(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.pxor(temp, temp); + code.psubw(temp, data); + code.pmaxsw(data, temp); + } break; case 32: - VectorAbs32(code, ctx, data); + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pabsd(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(temp, data); + code.psrad(temp, 31); + code.pxor(data, temp); + code.psubd(data, temp); + } break; case 64: - VectorAbs64(code, ctx, data); + if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { + code.vpabsq(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.pshufd(temp, data, 0b11110101); + code.psrad(temp, 31); + code.pxor(data, temp); + code.psubq(data, temp); + } break; } - ctx.reg_alloc.DefineValue(code, inst, data); } @@ -477,15 +446,15 @@ void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorAndNot(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const xmm_a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.pandn(xmm_b, xmm_a); ctx.reg_alloc.DefineValue(code, inst, xmm_b); } -static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) { +static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, auto const& result, u8 shift_amount) { if (code.HasHostFeature(HostFeature::GFNI)) { const u64 shift_matrix = shift_amount < 8 ? (0x0102040810204080 << (shift_amount * 8)) | (0x8080808080808080 >> (64 - shift_amount * 8)) @@ -494,7 +463,7 @@ static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const return; } - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.punpckhbw(tmp, result); code.punpcklbw(result, result); @@ -506,7 +475,7 @@ static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const void EmitX64::EmitVectorArithmeticShiftRight8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); ArithmeticShiftRightByte(ctx, code, result, shift_amount); @@ -517,7 +486,7 @@ void EmitX64::EmitVectorArithmeticShiftRight8(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorArithmeticShiftRight16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psraw(result, shift_amount); @@ -528,7 +497,7 @@ void EmitX64::EmitVectorArithmeticShiftRight16(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psrad(result, shift_amount); @@ -538,14 +507,14 @@ void EmitX64::EmitVectorArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = (std::min)(args[1].GetImmediateU8(), u8(63)); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { code.vpsraq(result, result, shift_amount); } else { - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); const u64 sign_bit = 0x80000000'00000000u >> shift_amount; @@ -660,12 +629,12 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastb(a, a); code.vmovq(a, a); } else if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pxor(tmp, tmp); code.pshufb(a, tmp); code.movq(a, a); @@ -678,7 +647,7 @@ void EmitX64::EmitVectorBroadcastLower8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(a, a, 0); @@ -687,7 +656,7 @@ void EmitX64::EmitVectorBroadcastLower16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastLower32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(a, a, 0b01000100); @@ -696,11 +665,11 @@ void EmitX64::EmitVectorBroadcastLower32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcast8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastb(a, a); } else if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pxor(tmp, tmp); code.pshufb(a, tmp); } else { @@ -713,7 +682,7 @@ void EmitX64::EmitVectorBroadcast8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcast16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastw(a, a); } else { @@ -725,7 +694,7 @@ void EmitX64::EmitVectorBroadcast16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcast32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastd(a, a); } else { @@ -736,7 +705,7 @@ void EmitX64::EmitVectorBroadcast32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcast64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastq(a, a); } else { @@ -747,7 +716,7 @@ void EmitX64::EmitVectorBroadcast64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastElementLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 16); @@ -758,7 +727,7 @@ void EmitX64::EmitVectorBroadcastElementLower8(EmitContext& ctx, IR::Inst* inst) code.vpbroadcastb(a, a); code.vmovq(a, a); } else if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pxor(tmp, tmp); code.pshufb(a, tmp); code.movq(a, a); @@ -771,7 +740,7 @@ void EmitX64::EmitVectorBroadcastElementLower8(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorBroadcastElementLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 8); @@ -784,7 +753,7 @@ void EmitX64::EmitVectorBroadcastElementLower16(EmitContext& ctx, IR::Inst* inst void EmitX64::EmitVectorBroadcastElementLower32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 4); @@ -800,7 +769,7 @@ void EmitX64::EmitVectorBroadcastElementLower32(EmitContext& ctx, IR::Inst* inst void EmitX64::EmitVectorBroadcastElement8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 16); @@ -810,7 +779,7 @@ void EmitX64::EmitVectorBroadcastElement8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastb(a, a); } else if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pxor(tmp, tmp); code.pshufb(a, tmp); @@ -824,7 +793,7 @@ void EmitX64::EmitVectorBroadcastElement8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastElement16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 8); @@ -844,7 +813,7 @@ void EmitX64::EmitVectorBroadcastElement16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastElement32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 4); @@ -856,7 +825,7 @@ void EmitX64::EmitVectorBroadcastElement32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastElement64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 2); @@ -1043,9 +1012,9 @@ void EmitX64::EmitVectorCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveEven8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.pand(lhs, tmp); @@ -1057,11 +1026,11 @@ void EmitX64::EmitVectorDeinterleaveEven8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveEven16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code); + auto const zero = ctx.reg_alloc.ScratchXmm(code); code.pxor(zero, zero); code.pblendw(lhs, zero, 0b10101010); @@ -1082,8 +1051,8 @@ void EmitX64::EmitVectorDeinterleaveEven16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveEven32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.shufps(lhs, rhs, 0b10001000); @@ -1092,8 +1061,8 @@ void EmitX64::EmitVectorDeinterleaveEven32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveEven64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.shufpd(lhs, rhs, 0b00); @@ -1102,16 +1071,16 @@ void EmitX64::EmitVectorDeinterleaveEven64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveEvenLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklbw(lhs, rhs); code.pshufb(lhs, code.Const(xword, 0x0D'09'05'01'0C'08'04'00, 0x8080808080808080)); } else { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.movdqa(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.pand(lhs, tmp); @@ -1126,15 +1095,15 @@ void EmitX64::EmitVectorDeinterleaveEvenLower8(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorDeinterleaveEvenLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklwd(lhs, rhs); code.pshufb(lhs, code.Const(xword, 0x0B0A'0302'0908'0100, 0x8080'8080'8080'8080)); } else { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.pslld(lhs, 16); code.psrad(lhs, 16); @@ -1152,8 +1121,8 @@ void EmitX64::EmitVectorDeinterleaveEvenLower16(EmitContext& ctx, IR::Inst* inst void EmitX64::EmitVectorDeinterleaveEvenLower32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); if (code.HasHostFeature(HostFeature::SSE41)) { // copy bytes 0:3 of rhs to lhs, zero out upper 8 bytes @@ -1168,8 +1137,8 @@ void EmitX64::EmitVectorDeinterleaveEvenLower32(EmitContext& ctx, IR::Inst* inst void EmitX64::EmitVectorDeinterleaveOdd8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psraw(lhs, 8); code.psraw(rhs, 8); @@ -1180,8 +1149,8 @@ void EmitX64::EmitVectorDeinterleaveOdd8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveOdd16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psrad(lhs, 16); code.psrad(rhs, 16); @@ -1192,8 +1161,8 @@ void EmitX64::EmitVectorDeinterleaveOdd16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveOdd32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.shufps(lhs, rhs, 0b11011101); @@ -1202,8 +1171,8 @@ void EmitX64::EmitVectorDeinterleaveOdd32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveOdd64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.shufpd(lhs, rhs, 0b11); @@ -1212,15 +1181,15 @@ void EmitX64::EmitVectorDeinterleaveOdd64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveOddLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklbw(lhs, rhs); code.pshufb(lhs, code.Const(xword, 0x0F'0B'07'03'0E'0A'06'02, 0x8080808080808080)); } else { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psraw(lhs, 8); code.psraw(rhs, 8); @@ -1234,15 +1203,15 @@ void EmitX64::EmitVectorDeinterleaveOddLower8(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorDeinterleaveOddLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklwd(lhs, rhs); code.pshufb(lhs, code.Const(xword, 0x0F0E'0706'0D0C'0504, 0x8080'8080'8080'8080)); } else { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psrad(lhs, 16); code.psrad(rhs, 16); @@ -1258,17 +1227,17 @@ void EmitX64::EmitVectorDeinterleaveOddLower32(EmitContext& ctx, IR::Inst* inst) auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm lhs = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); // copy bytes 4:7 of lhs to bytes 0:3 of rhs, zero out upper 8 bytes code.insertps(rhs, lhs, 0b01001100); ctx.reg_alloc.DefineValue(code, inst, rhs); } else { - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const zero = ctx.reg_alloc.ScratchXmm(code); code.xorps(zero, zero); code.unpcklps(lhs, rhs); @@ -1302,9 +1271,9 @@ void EmitX64::EmitVectorEqual64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqd(xmm_a, xmm_b); code.pshufd(tmp, xmm_a, 0b10110001); @@ -1317,9 +1286,9 @@ void EmitX64::EmitVectorEqual128(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqq(xmm_a, xmm_b); code.pshufd(tmp, xmm_a, 0b01001110); @@ -1327,9 +1296,9 @@ void EmitX64::EmitVectorEqual128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, xmm_a); } else { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqd(xmm_a, xmm_b); code.pshufd(tmp, xmm_a, 0b10110001); @@ -1353,16 +1322,16 @@ void EmitX64::EmitVectorExtract(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const xmm_a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.palignr(xmm_b, xmm_a, position / 8); ctx.reg_alloc.DefineValue(code, inst, xmm_b); return; } - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psrldq(xmm_a, position / 8); code.pslldq(xmm_b, (128 - position) / 8); @@ -1374,13 +1343,13 @@ void EmitX64::EmitVectorExtract(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorExtractLower(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 position = args[2].GetImmediateU8(); ASSERT(position % 8 == 0); if (position != 0) { - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklqdq(xmm_a, xmm_b); code.psrldq(xmm_a, position / 8); @@ -1405,22 +1374,33 @@ void EmitX64::EmitVectorGreaterS32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorGreaterS64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE42)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pcmpgtq); - return; + } else { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, code.Const(xword, 0x80000000, 0x80000000)); + code.pxor(tmp0, tmp2); + code.pxor(tmp1, tmp2); + code.movdqa(tmp2, tmp0); + code.pcmpeqd(tmp0, tmp1); + code.pcmpgtd(tmp2, tmp1); + code.pshufd(tmp1, tmp0, 245); + code.pshufd(tmp3, tmp2, 160); + code.pshufd(tmp0, tmp2, 245); + code.pand(tmp1, tmp3); + code.por(tmp0, tmp1); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - for (size_t i = 0; i < result.size(); ++i) { - result[i] = (a[i] > b[i]) ? ~u64(0) : 0; - } - }); } static void EmitVectorHalvingAddSigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, b); code.pand(tmp, a); @@ -1459,9 +1439,9 @@ void EmitX64::EmitVectorHalvingAddS32(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorHalvingAddUnsigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, b); @@ -1504,12 +1484,12 @@ void EmitX64::EmitVectorHalvingAddU32(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorHalvingSubSigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); switch (esize) { case 8: { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, code.Const(xword, 0x8080808080808080, 0x8080808080808080)); code.pxor(a, tmp); code.pxor(b, tmp); @@ -1518,7 +1498,7 @@ static void EmitVectorHalvingSubSigned(size_t esize, EmitContext& ctx, IR::Inst* break; } case 16: { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, code.Const(xword, 0x8000800080008000, 0x8000800080008000)); code.pxor(a, tmp); code.pxor(b, tmp); @@ -1552,8 +1532,8 @@ void EmitX64::EmitVectorHalvingSubS32(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorHalvingSubUnsigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); switch (esize) { case 8: @@ -1590,8 +1570,8 @@ void EmitX64::EmitVectorHalvingSubU32(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorInterleaveLower(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); switch (size) { case 8: @@ -1630,8 +1610,8 @@ void EmitX64::EmitVectorInterleaveLower64(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorInterleaveUpper(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); switch (size) { case 8: @@ -1670,7 +1650,7 @@ void EmitX64::EmitVectorInterleaveUpper64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); if (shift_amount == 0) { @@ -1696,7 +1676,7 @@ void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftLeft16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psllw(result, shift_amount); @@ -1707,7 +1687,7 @@ void EmitX64::EmitVectorLogicalShiftLeft16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.pslld(result, shift_amount); @@ -1718,7 +1698,7 @@ void EmitX64::EmitVectorLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psllq(result, shift_amount); @@ -1729,7 +1709,7 @@ void EmitX64::EmitVectorLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); if (shift_amount == 0) { @@ -1753,7 +1733,7 @@ void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftRight16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psrlw(result, shift_amount); @@ -1764,7 +1744,7 @@ void EmitX64::EmitVectorLogicalShiftRight16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psrld(result, shift_amount); @@ -1775,7 +1755,7 @@ void EmitX64::EmitVectorLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psrlq(result, shift_amount); @@ -1783,41 +1763,12 @@ void EmitX64::EmitVectorLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, result); } -template -static void EmitVectorLogicalVShiftAVX2(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - static_assert(esize == 32 || esize == 64); - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); - - // store sign bit of lowest byte of each element of b to select left/right shift later - ICODE(vpsll)(xmm0, b, u8(esize - 8)); - - // sse/avx shifts are only positive, with dedicated left/right forms - shift by lowest byte of abs(b) - code.vpabsb(b, b); - code.vpand(b, b, code.BConst(xword, 0xFF)); - - // calculate shifts - ICODE(vpsllv)(result, a, b); - ICODE(vpsrlv)(a, a, b); - - // implicit argument: xmm0 (sign of lowest byte of b) - if (esize == 32) { - code.blendvps(result, a); - } else { - code.blendvpd(result, a); - } - ctx.reg_alloc.DefineValue(code, inst, result); -} - void EmitX64::EmitVectorLogicalVShift8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::GFNI)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const left_shift = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Opmask negative_mask = k1; code.pxor(tmp, tmp); @@ -1862,10 +1813,10 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const left_shift = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const right_shift = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); @@ -1886,18 +1837,87 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitVectorLogicalVShift32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX2)) { - EmitVectorLogicalVShiftAVX2<32>(code, ctx, inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); + auto const mask = ctx.reg_alloc.ScratchXmm(code); + // store sign bit of lowest byte of each element of b to select left/right shift later + code.vpslld(mask, b, u8(32 - 8)); + // sse/avx shifts are only positive, with dedicated left/right forms - shift by lowest byte of abs(b) + code.vpabsb(b, b); + code.vpand(b, b, code.BConst<32>(xword, 0xFF)); + // calculate shifts + code.vpsllvd(result, a, b); + code.vpsrlvd(a, a, b); + code.vblendvps(result, result, a, mask); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), VShift); - }); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp5 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp6 = ctx.reg_alloc.ScratchXmm(code); + code.pxor(tmp3, tmp3); + code.movdqa(tmp2, tmp0); + code.psubb(tmp3, tmp1); + code.movdqa(tmp4, tmp2); + code.movdqa(tmp6, tmp2); + code.pminub(tmp3, tmp1); + code.pslld(tmp1, 24); + code.pand(tmp3, code.Const(xword, 0x000000ff'000000ff, 0x000000ff'000000ff)); + code.psrad(tmp1, 31); + code.pshuflw(tmp0, tmp3, 254); + code.pshuflw(tmp5, tmp3, 84); + code.psrld(tmp4, tmp0); + code.movdqa(tmp0, tmp2); + code.psrld(tmp0, tmp5); + code.punpcklqdq(tmp0, tmp4); + code.pshufd(tmp4, tmp3, 238); + code.pslld(tmp3, 23); + code.paddd(tmp3, code.Const(xword, 0x3F80'00003F80'0000, 0x3F80'00003F80'0000)); + code.pshuflw(tmp5, tmp4, 254); + code.pshuflw(tmp4, tmp4, 84); + code.psrld(tmp6, tmp5); + code.movdqa(tmp5, tmp2); + code.psrld(tmp5, tmp4); + code.pshufd(tmp4, tmp2, 245); + code.punpckhqdq(tmp5, tmp6); + code.cvttps2dq(tmp3, tmp3); + code.shufps(tmp0, tmp5, 204); + code.pmuludq(tmp2, tmp3); + code.pshufd(tmp3, tmp3, 245); + code.andps(tmp0, tmp1); + code.pmuludq(tmp3, tmp4); + code.pshufd(tmp2, tmp2, 232); + code.pshufd(tmp3, tmp3, 232); + code.punpckldq(tmp2, tmp3); + code.pandn(tmp1, tmp2); + code.orps(tmp0, tmp1); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } } void EmitX64::EmitVectorLogicalVShift64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX2)) { - EmitVectorLogicalVShiftAVX2<64>(code, ctx, inst); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); + auto const mask = ctx.reg_alloc.ScratchXmm(code); + // store sign bit of lowest byte of each element of b to select left/right shift later + code.vpsllq(mask, b, u8(64 - 8)); + // sse/avx shifts are only positive, with dedicated left/right forms - shift by lowest byte of abs(b) + code.vpabsb(b, b); + code.vpand(b, b, code.BConst<64>(xword, 0xFF)); + // calculate shifts + code.vpsllvq(result, a, b); + code.vpsrlvq(a, a, b); + code.vblendvpd(result, result, a, mask); + ctx.reg_alloc.DefineValue(code, inst, result); } else { EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), VShift); @@ -1912,28 +1932,11 @@ enum class MinMaxOperation { Max, }; -// Compute the minimum/maximum of two vectors of signed 8-bit integers, using only SSE2 instructons. -// The result of the operation is placed in operand a, while b is unmodified. -void FallbackMinMaxS8(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b, MinMaxOperation op) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); - if(op == MinMaxOperation::Min) { - code.movdqa(c, b); - code.pcmpgtb(c, a); - } else { - code.movdqa(c, a); - code.pcmpgtb(c, b); - } - - code.pand(a, c); - code.pandn(c, b); - code.por(a, c); -} - // Compute the minimum/maximum of two vectors of unsigned 16-bit integers, using only SSE2 instructons. // The result of the operation is placed in operand a, while b is unmodified. -void FallbackMinMaxU16(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b, MinMaxOperation op) { +void FallbackMinMaxU16(BlockOfCode& code, EmitContext& ctx, auto const& a, auto const& b, MinMaxOperation op) { if(op == MinMaxOperation::Min) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psubusw(c, b); code.psubw(a, c); @@ -1945,8 +1948,8 @@ void FallbackMinMaxU16(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, // Compute the minimum/maximum of two vectors of signed 32-bit integers, using only SSE2 instructons. // The result of the operation is placed in operand a, while b is unmodified. -void FallbackMinMaxS32(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b, MinMaxOperation op) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); +void FallbackMinMaxS32(BlockOfCode& code, EmitContext& ctx, auto const& a, auto const& b, MinMaxOperation op) { + auto const c = ctx.reg_alloc.ScratchXmm(code); if(op == MinMaxOperation::Min) { code.movdqa(c, b); code.pcmpgtd(c, a); @@ -1962,12 +1965,12 @@ void FallbackMinMaxS32(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, // Compute the minimum/maximum of two vectors of unsigned 32-bit integers, using only SSE2 instructons. // The result of the operation is placed in operand a, while b is unmodified. -void FallbackMinMaxU32(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b, MinMaxOperation op) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); +void FallbackMinMaxU32(BlockOfCode& code, EmitContext& ctx, auto const& a, auto const& b, MinMaxOperation op) { + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, code.BConst<32>(xword, 0x80000000)); // bias a and b by XORing their sign bits, then use the signed comparison function - const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(code); + auto const d = ctx.reg_alloc.ScratchXmm(code); if(op == MinMaxOperation::Min) { code.movdqa(d, a); code.pxor(d, c); @@ -1989,11 +1992,16 @@ void EmitX64::EmitVectorMaxS8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsb); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); - FallbackMinMaxS8(code, ctx, a, b, MinMaxOperation::Max); - ctx.reg_alloc.DefineValue(code, inst, a); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const c = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(c, a); + code.pcmpgtb(c, b); + code.pand(a, c); + code.pandn(c, b); + code.por(a, c); + ctx.reg_alloc.DefineValue(code, inst, a); } } @@ -2005,31 +2013,55 @@ void EmitX64::EmitVectorMaxS32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsd); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); - FallbackMinMaxS32(code, ctx, a, b, MinMaxOperation::Max); - ctx.reg_alloc.DefineValue(code, inst, a); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, tmp0); + code.pcmpgtd(tmp2, tmp1); + code.pand(tmp0, tmp2); + code.pandn(tmp2, tmp1); + code.por(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } } void EmitX64::EmitVectorMaxS64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpmaxsq); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + code.vpmaxsq(xmm_a, xmm_a, xmm_b); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } else if (code.HasHostFeature(HostFeature::AVX)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); code.vpcmpgtq(xmm0, y, x); code.pblendvb(x, y); - ctx.reg_alloc.DefineValue(code, inst, x); } else { - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::max)(x, y); }); - }); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp5 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, code.Const(xword, 0x8000'0000, 0x8000'0000)); + code.movdqa(tmp3, tmp1); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp0); + code.movdqa(tmp4, tmp2); + code.pcmpeqd(tmp2, tmp3); + code.pcmpgtd(tmp4, tmp3); + code.pshufd(tmp2, tmp2, 245); + code.pshufd(tmp5, tmp4, 160); + code.pshufd(tmp3, tmp4, 245); + code.pand(tmp2, tmp5); + code.por(tmp3, tmp2); + code.pand(tmp0, tmp3); + code.pandn(tmp3, tmp1); + code.por(tmp0, tmp3); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } } @@ -2041,11 +2073,11 @@ void EmitX64::EmitVectorMaxU16(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxuw); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); FallbackMinMaxU16(code, ctx, a, b, MinMaxOperation::Max); - ctx.reg_alloc.DefineValue(code, inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } } @@ -2053,35 +2085,54 @@ void EmitX64::EmitVectorMaxU32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxud); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); FallbackMinMaxU32(code, ctx, a, b, MinMaxOperation::Max); - ctx.reg_alloc.DefineValue(code, inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } } void EmitX64::EmitVectorMaxU64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpmaxuq); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + code.vpmaxuq(xmm_a, xmm_a, xmm_b); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } else if (code.HasHostFeature(HostFeature::AVX)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.vmovdqa(xmm0, code.Const(xword, 0x8000000000000000, 0x8000000000000000)); code.vpsubq(tmp, y, xmm0); code.vpsubq(xmm0, x, xmm0); code.vpcmpgtq(xmm0, tmp, xmm0); code.pblendvb(x, y); - ctx.reg_alloc.DefineValue(code, inst, x); } else { - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::max)(x, y); }); - }); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp5 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); + code.movdqa(tmp3, tmp1); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp0); + code.movdqa(tmp4, tmp2); + code.pcmpeqd(tmp2, tmp3); + code.pcmpgtd(tmp4, tmp3); + code.pshufd(tmp2, tmp2, 245); + code.pshufd(tmp5, tmp4, 160); + code.pshufd(tmp3, tmp4, 245); + code.pand(tmp2, tmp5); + code.por(tmp3, tmp2); + code.pand(tmp0, tmp3); + code.pandn(tmp3, tmp1); + code.por(tmp0, tmp3); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } } @@ -2089,11 +2140,16 @@ void EmitX64::EmitVectorMinS8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminsb); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); - FallbackMinMaxS8(code, ctx, a, b, MinMaxOperation::Min); - ctx.reg_alloc.DefineValue(code, inst, a); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const c = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(c, b); + code.pcmpgtb(c, a); + code.pand(a, c); + code.pandn(c, b); + code.por(a, c); + ctx.reg_alloc.DefineValue(code, inst, a); } } @@ -2105,31 +2161,51 @@ void EmitX64::EmitVectorMinS32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminsd); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); FallbackMinMaxS32(code, ctx, a, b, MinMaxOperation::Min); - ctx.reg_alloc.DefineValue(code, inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } } void EmitX64::EmitVectorMinS64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpminsq); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + code.vpminsq(xmm_a, xmm_a, xmm_b); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } else if (code.HasHostFeature(HostFeature::AVX)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.vpcmpgtq(xmm0, y, x); code.pblendvb(y, x); - ctx.reg_alloc.DefineValue(code, inst, y); } else { - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::min)(x, y); }); - }); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp5 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, code.Const(xword, 0x8000'0000, 0x8000'0000)); + code.movdqa(tmp3, tmp1); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp0); + code.movdqa(tmp4, tmp2); + code.pcmpeqd(tmp2, tmp3); + code.pcmpgtd(tmp4, tmp3); + code.pshufd(tmp3, tmp2, 245); + code.pshufd(tmp5, tmp4, 160); + code.pshufd(tmp2, tmp4, 245); + code.pand(tmp3, tmp5); + code.por(tmp2, tmp3); + code.pand(tmp1, tmp2); + code.pandn(tmp2, tmp0); + code.por(tmp2, tmp1); + //code.movdqa(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp2); } } @@ -2141,11 +2217,11 @@ void EmitX64::EmitVectorMinU16(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminuw); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); FallbackMinMaxU16(code, ctx, a, b, MinMaxOperation::Min); - ctx.reg_alloc.DefineValue(code, inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } } @@ -2153,57 +2229,93 @@ void EmitX64::EmitVectorMinU32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminud); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); FallbackMinMaxU32(code, ctx, a, b, MinMaxOperation::Min); - ctx.reg_alloc.DefineValue(code, inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } } void EmitX64::EmitVectorMinU64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpminuq); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + code.vpminuq(xmm_a, xmm_a, xmm_b); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } else if (code.HasHostFeature(HostFeature::AVX)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.vmovdqa(xmm0, code.Const(xword, 0x8000000000000000, 0x8000000000000000)); code.vpsubq(tmp, y, xmm0); code.vpsubq(xmm0, x, xmm0); code.vpcmpgtq(xmm0, tmp, xmm0); code.pblendvb(y, x); - ctx.reg_alloc.DefineValue(code, inst, y); } else { - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::min)(x, y); }); - }); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp5 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); + code.movdqa(tmp3, tmp1); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp0); + code.movdqa(tmp4, tmp2); + code.pcmpeqd(tmp2, tmp3); + code.pcmpgtd(tmp4, tmp3); + code.pshufd(tmp3, tmp2, 245); + code.pshufd(tmp5, tmp4, 160); + code.pshufd(tmp2, tmp4, 245); + code.pand(tmp3, tmp5); + code.por(tmp2, tmp3); + code.pand(tmp1, tmp2); + code.pandn(tmp2, tmp0); + code.por(tmp2, tmp1); + //code.movdqa(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp2); } } void EmitX64::EmitVectorMultiply8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm(code); - - // TODO: Optimize - code.movdqa(tmp_a, a); - code.movdqa(tmp_b, b); - code.pmullw(a, b); - code.psrlw(tmp_a, 8); - code.psrlw(tmp_b, 8); - code.pmullw(tmp_a, tmp_b); - code.pand(a, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); - code.psllw(tmp_a, 8); - code.por(a, tmp_a); - - ctx.reg_alloc.DefineValue(code, inst, a); + if (code.HasHostFeature(HostFeature::AVX)) { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + code.vbroadcastss(tmp3, code.Const(dword, 0x00ff'00ff)); + code.vpmullw(tmp2, tmp1, tmp0); + code.vpandn(tmp0, tmp3, tmp0); + code.vpand(tmp2, tmp2, tmp3); + code.vpmaddubsw(tmp0, tmp1, tmp0); + code.vpsllw(tmp0, tmp0, 8); + code.vpor(tmp0, tmp2, tmp0); + ctx.reg_alloc.DefineValue(code, inst, tmp0); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, tmp0); + code.movdqa(tmp3, tmp1); + code.movdqa(tmp4, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); + code.punpckhbw(tmp2, tmp2); + code.punpckhbw(tmp3, tmp3); + code.punpcklbw(tmp0, tmp0); + code.punpcklbw(tmp1, tmp1); + code.pmullw(tmp3, tmp2); + code.pmullw(tmp0, tmp1); + code.pand(tmp3, tmp4); + code.pand(tmp0, tmp4); + code.packuswb(tmp0, tmp3); + ctx.reg_alloc.DefineValue(code, inst, tmp0); + } } void EmitX64::EmitVectorMultiply16(EmitContext& ctx, IR::Inst* inst) { @@ -2214,31 +2326,32 @@ void EmitX64::EmitVectorMultiply32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmulld); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - - code.movdqa(tmp, a); - code.psrlq(a, 32); - code.pmuludq(tmp, b); - code.psrlq(b, 32); - code.pmuludq(a, b); - code.pshufd(tmp, tmp, 0b00001000); - code.pshufd(b, a, 0b00001000); - code.punpckldq(tmp, b); - - ctx.reg_alloc.DefineValue(code, inst, tmp); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp, a); + code.psrlq(a, 32); + code.pmuludq(tmp, b); + code.psrlq(b, 32); + code.pmuludq(a, b); + code.pshufd(tmp, tmp, 0b00001000); + code.pshufd(b, a, 0b00001000); + code.punpckldq(tmp, b); + ctx.reg_alloc.DefineValue(code, inst, tmp); } } void EmitX64::EmitVectorMultiply64(EmitContext& ctx, IR::Inst* inst) { - if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512DQ)) { - EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpmullq); - } else if (code.HasHostFeature(HostFeature::SSE41)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512DQ)) { + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + code.vpmullq(xmm_a, xmm_a, xmm_b); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); + } else if (code.HasHostFeature(HostFeature::SSE41)) { + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); const Xbyak::Reg64 tmp1 = ctx.reg_alloc.ScratchGpr(code); const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(code); @@ -2253,29 +2366,28 @@ void EmitX64::EmitVectorMultiply64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, a); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); - code.movdqa(tmp1, a); - code.movdqa(tmp2, a); - code.movdqa(tmp3, b); + code.movdqa(tmp1, a); + code.movdqa(tmp2, a); + code.movdqa(tmp3, b); - code.psrlq(tmp1, 32); - code.psrlq(tmp3, 32); + code.psrlq(tmp1, 32); + code.psrlq(tmp3, 32); - code.pmuludq(tmp2, b); - code.pmuludq(tmp3, a); - code.pmuludq(b, tmp1); + code.pmuludq(tmp2, b); + code.pmuludq(tmp3, a); + code.pmuludq(b, tmp1); - code.paddq(b, tmp3); - code.psllq(b, 32); - code.paddq(tmp2, b); + code.paddq(b, tmp3); + code.psllq(b, 32); + code.paddq(tmp2, b); - ctx.reg_alloc.DefineValue(code, inst, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp2); } } @@ -2307,15 +2419,15 @@ void EmitX64::EmitVectorNarrow16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmovwb(result, a); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.pand(a, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); @@ -2328,13 +2440,13 @@ void EmitX64::EmitVectorNarrow16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorNarrow32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmovdw(result, a); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); if (code.HasHostFeature(HostFeature::SSE41)) { code.pblendw(a, zeros, 0b10101010); @@ -2352,15 +2464,15 @@ void EmitX64::EmitVectorNarrow64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmovqd(result, a); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.shufps(a, zeros, 0b00001000); @@ -2373,13 +2485,13 @@ void EmitX64::EmitVectorNot(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); + auto const operand = ctx.reg_alloc.UseXmm(code, args[0]); code.vpternlogq(result, operand, operand, u8(~Tern::c)); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqw(xmm_b, xmm_b); code.pxor(xmm_a, xmm_b); ctx.reg_alloc.DefineValue(code, inst, xmm_a); @@ -2393,9 +2505,9 @@ void EmitX64::EmitVectorOr(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAddLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.punpcklqdq(xmm_a, xmm_b); code.movdqa(tmp, xmm_a); @@ -2411,9 +2523,9 @@ void EmitX64::EmitVectorPairedAddLower8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAddLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.punpcklqdq(xmm_a, xmm_b); if (code.HasHostFeature(HostFeature::SSSE3)) { @@ -2434,9 +2546,9 @@ void EmitX64::EmitVectorPairedAddLower16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.punpcklqdq(xmm_a, xmm_b); if (code.HasHostFeature(HostFeature::SSSE3)) { @@ -2456,10 +2568,10 @@ void EmitX64::EmitVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAdd8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const c = ctx.reg_alloc.ScratchXmm(code); + auto const d = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.movdqa(d, b); @@ -2478,17 +2590,17 @@ void EmitX64::EmitVectorPairedAdd16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); code.phaddw(a, b); ctx.reg_alloc.DefineValue(code, inst, a); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const c = ctx.reg_alloc.ScratchXmm(code); + auto const d = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.movdqa(d, b); @@ -2508,17 +2620,17 @@ void EmitX64::EmitVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); code.phaddd(a, b); ctx.reg_alloc.DefineValue(code, inst, a); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const c = ctx.reg_alloc.ScratchXmm(code); + auto const d = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.movdqa(d, b); @@ -2535,9 +2647,9 @@ void EmitX64::EmitVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.punpcklqdq(a, b); @@ -2550,8 +2662,8 @@ void EmitX64::EmitVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAddSignedWiden8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psllw(a, 8); @@ -2565,8 +2677,8 @@ void EmitX64::EmitVectorPairedAddSignedWiden8(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorPairedAddSignedWiden16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.pslld(a, 16); @@ -2580,18 +2692,18 @@ void EmitX64::EmitVectorPairedAddSignedWiden16(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.vpsraq(c, a, 32); code.vpsllq(a, a, 32); code.vpsraq(a, a, 32); code.vpaddq(a, a, c); } else { - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psllq(a, 32); @@ -2613,8 +2725,8 @@ void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorPairedAddUnsignedWiden8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psllw(a, 8); @@ -2628,8 +2740,8 @@ void EmitX64::EmitVectorPairedAddUnsignedWiden8(EmitContext& ctx, IR::Inst* inst void EmitX64::EmitVectorPairedAddUnsignedWiden16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.pslld(a, 16); @@ -2643,8 +2755,8 @@ void EmitX64::EmitVectorPairedAddUnsignedWiden16(EmitContext& ctx, IR::Inst* ins void EmitX64::EmitVectorPairedAddUnsignedWiden32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psllq(a, 32); @@ -2658,14 +2770,10 @@ void EmitX64::EmitVectorPairedAddUnsignedWiden32(EmitContext& ctx, IR::Inst* ins template static void PairedOperation(VectorArray& result, const VectorArray& x, const VectorArray& y, Function fn) { const size_t range = x.size() / 2; - - for (size_t i = 0; i < range; i++) { + for (size_t i = 0; i < range; i++) result[i] = fn(x[2 * i], x[2 * i + 1]); - } - - for (size_t i = 0; i < range; i++) { + for (size_t i = 0; i < range; i++) result[range + i] = fn(y[2 * i], y[2 * i + 1]); - } } template @@ -2686,11 +2794,6 @@ static void PairedMax(VectorArray& result, const VectorArray& x, const Vec PairedOperation(result, x, y, [](auto a, auto b) { return (std::max)(a, b); }); } -template -static void PairedMin(VectorArray& result, const VectorArray& x, const VectorArray& y) { - PairedOperation(result, x, y, [](auto a, auto b) { return (std::min)(a, b); }); -} - template static void LowerPairedMax(VectorArray& result, const VectorArray& x, const VectorArray& y) { LowerPairedOperation(result, x, y, [](auto a, auto b) { return (std::max)(a, b); }); @@ -2705,19 +2808,16 @@ template static void EmitVectorPairedMinMax8(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, code.Const(xword, 0x0E'0C'0A'08'06'04'02'00, 0x0F'0D'0B'09'07'05'03'01)); code.pshufb(x, tmp); code.pshufb(y, tmp); - code.movaps(tmp, x); code.shufps(tmp, y, 0b01'00'01'00); - code.shufps(x, y, 0b11'10'11'10); - if constexpr (std::is_member_function_pointer_v) { (code.*fn)(x, tmp); } else { @@ -2730,21 +2830,17 @@ static void EmitVectorPairedMinMax8(BlockOfCode& code, EmitContext& ctx, IR::Ins template static void EmitVectorPairedMinMaxLower8(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.punpcklqdq(x, y); code.pshufb(x, code.Const(xword, 0x0E'0C'0A'08'06'04'02'00, 0x0F'0D'0B'09'07'05'03'01)); code.movhlps(y, x); code.movq(x, x); - if constexpr (std::is_member_function_pointer_v) { (code.*fn)(x, y); } else { fn(x, y); } - ctx.reg_alloc.DefineValue(code, inst, x); } @@ -2752,9 +2848,9 @@ template static void EmitVectorPairedMinMax16(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); // swap idxs 1 and 2 within 64-bit lanes so that both registers contain [even, odd, even, odd]-indexed pairs of elements code.pshuflw(x, x, 0b11'01'10'00); @@ -2780,63 +2876,31 @@ static void EmitVectorPairedMinMax16(BlockOfCode& code, EmitContext& ctx, IR::In ctx.reg_alloc.DefineValue(code, inst, x); } -template -static void EmitVectorPairedMinMaxLower16(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - - // swap idxs 1 and 2 so that both registers contain even then odd-indexed pairs of elements - code.pshuflw(x, x, 0b11'01'10'00); - code.pshuflw(y, y, 0b11'01'10'00); - - // move pairs of even/odd-indexed elements into one register each - - // tmp = x[0, 2], y[0, 2], 0s... - code.movaps(tmp, y); - code.insertps(tmp, x, 0b01001100); - // x = x[1, 3], y[1, 3], 0s... - code.insertps(x, y, 0b00011100); - - (code.*fn)(x, tmp); - - ctx.reg_alloc.DefineValue(code, inst, x); -} - -static void EmitVectorPairedMinMaxLower32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - - // tmp = x[1], y[1], 0, 0 - code.movaps(tmp, y); - code.insertps(tmp, x, 0b01001100); - // x = x[0], y[0], 0, 0 - code.insertps(x, y, 0b00011100); - - (code.*fn)(x, tmp); - - ctx.reg_alloc.DefineValue(code, inst, x); -} void EmitX64::EmitVectorPairedMaxS8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp, code.Const(xword, 0x0E'0C'0A'08'06'04'02'00, 0x0F'0D'0B'09'07'05'03'01)); + code.pshufb(x, tmp); + code.pshufb(y, tmp); + code.movaps(tmp, x); + code.shufps(tmp, y, 0b01'00'01'00); + code.shufps(x, y, 0b11'10'11'10); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMax8(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsb); - return; - } else if (code.HasHostFeature(HostFeature::SSSE3)) { - EmitVectorPairedMinMax8(code, ctx, inst, [&](const auto& lhs, const auto& rhs) { - FallbackMinMaxS8(code, ctx, lhs, rhs, MinMaxOperation::Max); - }); - return; + code.pmaxsb(x, tmp); + } else { + auto const a = x; + auto const b = tmp; + auto const c = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(c, a); + code.pcmpgtb(c, b); + code.pand(a, c); + code.pandn(c, b); + code.por(a, c); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - PairedMax(result, a, b); - }); + ctx.reg_alloc.DefineValue(code, inst, x); } void EmitX64::EmitVectorPairedMaxS16(EmitContext& ctx, IR::Inst* inst) { @@ -2846,9 +2910,9 @@ void EmitX64::EmitVectorPairedMaxS16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMaxS32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, x); code.shufps(tmp, y, 0b10001000); @@ -2866,12 +2930,24 @@ void EmitX64::EmitVectorPairedMaxS32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMaxU8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSSE3)) { EmitVectorPairedMinMax8(code, ctx, inst, &Xbyak::CodeGenerator::pmaxub); - return; + } else { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const constant_00ff = code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF); + code.movdqa(tmp2, constant_00ff); + code.movdqa(tmp3, tmp1); + code.pand(tmp3, tmp2); + code.pand(tmp2, tmp0); + code.packuswb(tmp2, tmp3); + code.psrlw(tmp1, 8); + code.psrlw(tmp0, 8); + code.packuswb(tmp0, tmp1); + code.pmaxub(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - PairedMax(result, a, b); - }); } void EmitX64::EmitVectorPairedMaxU16(EmitContext& ctx, IR::Inst* inst) { @@ -2887,9 +2963,9 @@ void EmitX64::EmitVectorPairedMaxU16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMaxU32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, x); code.shufps(tmp, y, 0b10001000); @@ -2907,14 +2983,15 @@ void EmitX64::EmitVectorPairedMaxU32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMinS8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorPairedMinMax8(code, ctx, inst, &Xbyak::CodeGenerator::pminsb); - } else if (code.HasHostFeature(HostFeature::SSSE3)) { - EmitVectorPairedMinMax8(code, ctx, inst, [&](const auto& lhs, const auto& rhs) { - FallbackMinMaxS8(code, ctx, lhs, rhs, MinMaxOperation::Min); - }); } else { - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - PairedMin(result, a, b); - }); + EmitVectorPairedMinMax8(code, ctx, inst, [&](const auto& a, const auto& b) { + auto const c = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(c, b); + code.pcmpgtb(c, a); + code.pand(a, c); + code.pandn(c, b); + code.por(a, c); + }); } } @@ -2925,9 +3002,9 @@ void EmitX64::EmitVectorPairedMinS16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMinS32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, x); code.shufps(tmp, y, 0b10001000); @@ -2943,12 +3020,25 @@ void EmitX64::EmitVectorPairedMinS32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMinU8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSSE3)) { EmitVectorPairedMinMax8(code, ctx, inst, &Xbyak::CodeGenerator::pminub); - return; + } else { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const constant_00ff = code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF); + code.movdqa(tmp2, tmp1); + code.psrlw(tmp2, 8); + code.movdqa(tmp3, tmp0); + code.psrlw(tmp3, 8); + code.packuswb(tmp3, tmp2); + code.movdqa(tmp2, constant_00ff); + code.pand(tmp1, tmp2); + code.pand(tmp0, tmp2); + code.packuswb(tmp0, tmp1); + code.pminub(tmp0, tmp3); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - PairedMin(result, a, b); - }); } void EmitX64::EmitVectorPairedMinU16(EmitContext& ctx, IR::Inst* inst) { @@ -2964,9 +3054,9 @@ void EmitX64::EmitVectorPairedMinU16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMinU32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, x); code.shufps(tmp, y, 0b10001000); @@ -2982,41 +3072,88 @@ void EmitX64::EmitVectorPairedMinU32(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitVectorPairedMaxLowerS8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower8(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsb); - return; - } else if (code.HasHostFeature(HostFeature::SSSE3)) { - EmitVectorPairedMinMaxLower8(code, ctx, inst, [&](const auto& lhs, const auto& rhs) { - FallbackMinMaxS8(code, ctx, lhs, rhs, MinMaxOperation::Max); - }); - return; + code.punpcklqdq(x, y); + code.pshufb(x, code.Const(xword, 0x0E'0C'0A'08'06'04'02'00, 0x0F'0D'0B'09'07'05'03'01)); + code.movhlps(y, x); + code.movq(x, x); + code.pmaxsb(x, y); + } else { + auto const c = ctx.reg_alloc.ScratchXmm(code); + code.punpcklqdq(x, y); + code.pshufb(x, code.Const(xword, 0x0E'0C'0A'08'06'04'02'00, 0x0F'0D'0B'09'07'05'03'01)); + code.movhlps(y, x); + code.movq(x, x); + code.movdqa(c, x); + code.pcmpgtb(c, y); + code.pand(x, c); + code.pandn(c, y); + code.por(x, c); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMax(result, a, b); - }); + ctx.reg_alloc.DefineValue(code, inst, x); } void EmitX64::EmitVectorPairedMaxLowerS16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower16(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsw); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // swap idxs 1 and 2 so that both registers contain even then odd-indexed pairs of elements + code.pshuflw(x, x, 0b11'01'10'00); + code.pshuflw(y, y, 0b11'01'10'00); + // move pairs of even/odd-indexed elements into one register each + // tmp = x[0, 2], y[0, 2], 0s... + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[1, 3], y[1, 3], 0s... + code.insertps(x, y, 0b00011100); + code.pmaxsw(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + code.punpcklwd(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 232); + code.pshuflw(tmp1, tmp1, 216); + code.pshufd(tmp0, tmp0, 231); + code.pshuflw(tmp0, tmp0, 114); + code.pmaxsw(tmp0, tmp1); + code.movq(tmp0, tmp0); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMax(result, a, b); - }); } void EmitX64::EmitVectorPairedMaxLowerS32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower32(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsd); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // tmp = x[1], y[1], 0, 0 + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[0], y[0], 0, 0 + code.insertps(x, y, 0b00011100); + code.pmaxsd(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.punpckldq(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 238); + code.movdqa(tmp2, tmp0); + code.pcmpgtd(tmp2, tmp1); + code.pand(tmp0, tmp2); + code.pandn(tmp2, tmp1); + code.por(tmp2, tmp0); + code.movq(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMax(result, a, b); - }); } void EmitX64::EmitVectorPairedMaxLowerU8(EmitContext& ctx, IR::Inst* inst) { @@ -3031,63 +3168,143 @@ void EmitX64::EmitVectorPairedMaxLowerU8(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitVectorPairedMaxLowerU16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower16(code, ctx, inst, &Xbyak::CodeGenerator::pmaxuw); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // swap idxs 1 and 2 so that both registers contain even then odd-indexed pairs of elements + code.pshuflw(x, x, 0b11'01'10'00); + code.pshuflw(y, y, 0b11'01'10'00); + // move pairs of even/odd-indexed elements into one register each + // tmp = x[0, 2], y[0, 2], 0s... + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[1, 3], y[1, 3], 0s... + code.insertps(x, y, 0b00011100); + code.pmaxuw(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + code.punpcklwd(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 232); + code.pshuflw(tmp1, tmp1, 216); + code.pshufd(tmp0, tmp0, 231); + code.pshuflw(tmp0, tmp0, 114); + code.psubusw(tmp0, tmp1); + code.paddw(tmp0, tmp1); + code.movq(tmp0, tmp0); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMax(result, a, b); - }); } void EmitX64::EmitVectorPairedMaxLowerU32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower32(code, ctx, inst, &Xbyak::CodeGenerator::pmaxud); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // tmp = x[1], y[1], 0, 0 + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[0], y[0], 0, 0 + code.insertps(x, y, 0b00011100); + code.pmaxud(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + code.punpckldq(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 238); + code.movdqa(tmp2, code.Const(xword, 0x8000'00008000'0000, 0x8000'00008000'0000)); + code.movdqa(tmp3, tmp0); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp1); + code.pcmpgtd(tmp3, tmp2); + code.pand(tmp0, tmp3); + code.pandn(tmp3, tmp1); + code.por(tmp3, tmp0); + code.movq(tmp0, tmp3); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMax(result, a, b); - }); } void EmitX64::EmitVectorPairedMinLowerS8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorPairedMinMaxLower8(code, ctx, inst, &Xbyak::CodeGenerator::pminsb); - return; - } else if (code.HasHostFeature(HostFeature::SSSE3)) { - EmitVectorPairedMinMaxLower8(code, ctx, inst, [&](const auto& lhs, const auto& rhs) { - FallbackMinMaxS8(code, ctx, lhs, rhs, MinMaxOperation::Min); + } else { + EmitVectorPairedMinMaxLower8(code, ctx, inst, [&](const auto& a, const auto& b) { + auto const c = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(c, b); + code.pcmpgtb(c, a); + code.pand(a, c); + code.pandn(c, b); + code.por(a, c); }); - return; } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMin(result, a, b); - }); } void EmitX64::EmitVectorPairedMinLowerS16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower16(code, ctx, inst, &Xbyak::CodeGenerator::pminsw); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // swap idxs 1 and 2 so that both registers contain even then odd-indexed pairs of elements + code.pshuflw(x, x, 0b11'01'10'00); + code.pshuflw(y, y, 0b11'01'10'00); + // move pairs of even/odd-indexed elements into one register each + // tmp = x[0, 2], y[0, 2], 0s... + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[1, 3], y[1, 3], 0s... + code.insertps(x, y, 0b00011100); + code.pminsw(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + code.punpcklwd(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 231); + code.pshuflw(tmp1, tmp1, 114); + code.pshufd(tmp0, tmp0, 232); + code.pshuflw(tmp0, tmp0, 216); + code.pminsw(tmp0, tmp1); + code.movq(tmp0, tmp0); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMin(result, a, b); - }); } void EmitX64::EmitVectorPairedMinLowerS32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower32(code, ctx, inst, &Xbyak::CodeGenerator::pminsd); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // tmp = x[1], y[1], 0, 0 + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[0], y[0], 0, 0 + code.insertps(x, y, 0b00011100); + code.pminsd(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.punpckldq(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 238); + code.movdqa(tmp2, tmp0); + code.pcmpgtd(tmp2, tmp1); + code.pand(tmp1, tmp2); + code.pandn(tmp2, tmp0); + code.por(tmp2, tmp1); + code.movq(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMin(result, a, b); - }); } void EmitX64::EmitVectorPairedMinLowerU8(EmitContext& ctx, IR::Inst* inst) { @@ -3102,50 +3319,91 @@ void EmitX64::EmitVectorPairedMinLowerU8(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitVectorPairedMinLowerU16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower16(code, ctx, inst, &Xbyak::CodeGenerator::pminuw); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // swap idxs 1 and 2 so that both registers contain even then odd-indexed pairs of elements + code.pshuflw(x, x, 0b11'01'10'00); + code.pshuflw(y, y, 0b11'01'10'00); + // move pairs of even/odd-indexed elements into one register each + // tmp = x[0, 2], y[0, 2], 0s... + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[1, 3], y[1, 3], 0s... + code.insertps(x, y, 0b00011100); + code.pminuw(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.punpcklwd(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 231); + code.pshuflw(tmp1, tmp1, 114); + code.pshufd(tmp0, tmp0, 232); + code.pshuflw(tmp0, tmp0, 216); + code.movdqa(tmp2, tmp1); + code.psubusw(tmp2, tmp0); + code.psubw(tmp1, tmp2); + code.movq(tmp0, tmp1); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMin(result, a, b); - }); } void EmitX64::EmitVectorPairedMinLowerU32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower32(code, ctx, inst, &Xbyak::CodeGenerator::pminud); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // tmp = x[1], y[1], 0, 0 + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[0], y[0], 0, 0 + code.insertps(x, y, 0b00011100); + code.pminud(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + code.punpckldq(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 238); + code.movdqa(tmp2, code.Const(xword, 0x8000'00008000'0000, 0x8000'00008000'0000)); + code.movdqa(tmp3, tmp0); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp1); + code.pcmpgtd(tmp3, tmp2); + code.pand(tmp1, tmp3); + code.pandn(tmp3, tmp0); + code.por(tmp3, tmp1); + code.movq(tmp0, tmp3); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMin(result, a, b); - }); } template static D PolynomialMultiply(T lhs, T rhs) { constexpr size_t bit_size = mcl::bitsizeof; const std::bitset operand(lhs); - D res = 0; - for (size_t i = 0; i < bit_size; i++) { - if (operand[i]) { + for (size_t i = 0; i < bit_size; i++) + if (operand[i]) res ^= rhs << i; - } - } - return res; } void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm alternate = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); + auto const alternate = ctx.reg_alloc.ScratchXmm(code); + auto const mask = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Reg32 counter = ctx.reg_alloc.ScratchGpr(code).cvt32(); Xbyak::Label loop; @@ -3183,11 +3441,11 @@ void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPolynomialMultiplyLong8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm alternate = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); + auto const alternate = ctx.reg_alloc.ScratchXmm(code); + auto const mask = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Reg32 counter = ctx.reg_alloc.ScratchGpr(code).cvt32(); Xbyak::Label loop; @@ -3229,8 +3487,8 @@ void EmitX64::EmitVectorPolynomialMultiplyLong8(EmitContext& ctx, IR::Inst* inst void EmitX64::EmitVectorPolynomialMultiplyLong64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::PCLMULQDQ)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); code.pclmulqdq(xmm_a, xmm_b, 0x00); @@ -3260,7 +3518,7 @@ void EmitX64::EmitVectorPolynomialMultiplyLong64(EmitContext& ctx, IR::Inst* ins void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::AVX512BITALG)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpopcntb(data, data); @@ -3271,10 +3529,10 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSSE3)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm low_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm high_a = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const low_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const high_a = ctx.reg_alloc.ScratchXmm(code); + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(high_a, low_a); code.psrlw(high_a, 4); @@ -3303,12 +3561,12 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::GFNI)) { code.gf2p8affineqb(data, code.Const(xword, 0x8040201008040201, 0x8040201008040201), 0); } else { - const Xbyak::Xmm high_nibble_reg = ctx.reg_alloc.ScratchXmm(code); + auto const high_nibble_reg = ctx.reg_alloc.ScratchXmm(code); code.movdqa(high_nibble_reg, code.Const(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); code.pand(high_nibble_reg, data); code.pxor(data, high_nibble_reg); @@ -3316,7 +3574,7 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSSE3)) { // High lookup - const Xbyak::Xmm high_reversed_reg = ctx.reg_alloc.ScratchXmm(code); + auto const high_reversed_reg = ctx.reg_alloc.ScratchXmm(code); code.movdqa(high_reversed_reg, code.Const(xword, 0xE060A020C0408000, 0xF070B030D0509010)); code.pshufb(high_reversed_reg, data); @@ -3350,8 +3608,8 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReverseElementsInHalfGroups8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, data); code.psllw(tmp, 8); @@ -3363,13 +3621,13 @@ void EmitX64::EmitVectorReverseElementsInHalfGroups8(EmitContext& ctx, IR::Inst* void EmitX64::EmitVectorReverseElementsInWordGroups8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpshufb(data, data, code.Const(xword, 0x0405060700010203, 0x0c0d0e0f08090a0b)); } else if (code.HasHostFeature(HostFeature::SSSE3)) { code.pshufb(data, code.Const(xword, 0x0405060700010203, 0x0c0d0e0f08090a0b)); } else { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, data); code.psllw(tmp, 8); code.psrlw(data, 8); @@ -3382,7 +3640,7 @@ void EmitX64::EmitVectorReverseElementsInWordGroups8(EmitContext& ctx, IR::Inst* void EmitX64::EmitVectorReverseElementsInWordGroups16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(data, data, 0b10110001); code.pshufhw(data, data, 0b10110001); ctx.reg_alloc.DefineValue(code, inst, data); @@ -3390,13 +3648,13 @@ void EmitX64::EmitVectorReverseElementsInWordGroups16(EmitContext& ctx, IR::Inst void EmitX64::EmitVectorReverseElementsInLongGroups8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpshufb(data, data, code.Const(xword, 0x0001020304050607, 0x08090a0b0c0d0e0f)); } else if (code.HasHostFeature(HostFeature::SSSE3)) { code.pshufb(data, code.Const(xword, 0x0001020304050607, 0x08090a0b0c0d0e0f)); } else { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, data); code.psllw(tmp, 8); code.psrlw(data, 8); @@ -3410,7 +3668,7 @@ void EmitX64::EmitVectorReverseElementsInLongGroups8(EmitContext& ctx, IR::Inst* void EmitX64::EmitVectorReverseElementsInLongGroups16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(data, data, 0b00011011); code.pshufhw(data, data, 0b00011011); @@ -3421,7 +3679,7 @@ void EmitX64::EmitVectorReverseElementsInLongGroups16(EmitContext& ctx, IR::Inst void EmitX64::EmitVectorReverseElementsInLongGroups32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(data, data, 0b01001110); code.pshufhw(data, data, 0b01001110); @@ -3432,8 +3690,8 @@ void EmitX64::EmitVectorReverseElementsInLongGroups32(EmitContext& ctx, IR::Inst void EmitX64::EmitVectorReduceAdd8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm temp = xmm0; + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const temp = xmm0; // Add upper elements to lower elements code.pshufd(temp, data, 0b01'00'11'10); @@ -3453,8 +3711,8 @@ void EmitX64::EmitVectorReduceAdd8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReduceAdd16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm temp = xmm0; + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const temp = xmm0; if (code.HasHostFeature(HostFeature::SSSE3)) { code.pxor(temp, temp); @@ -3484,8 +3742,8 @@ void EmitX64::EmitVectorReduceAdd16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReduceAdd32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm temp = xmm0; + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const temp = xmm0; // Add upper elements to lower elements(reversed) code.pshufd(temp, data, 0b00'01'10'11); @@ -3508,8 +3766,8 @@ void EmitX64::EmitVectorReduceAdd32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReduceAdd64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm temp = xmm0; + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const temp = xmm0; // Add upper elements to lower elements code.pshufd(temp, data, 0b01'00'11'10); @@ -3524,8 +3782,8 @@ void EmitX64::EmitVectorReduceAdd64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorRotateWholeVectorRight(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const operand = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); const u8 shift_amount = args[1].GetImmediateU8(); ASSERT(shift_amount % 32 == 0); const u8 shuffle_imm = std::rotr(0b11100100, shift_amount / 32 * 2); @@ -3538,12 +3796,12 @@ void EmitX64::EmitVectorRotateWholeVectorRight(EmitContext& ctx, IR::Inst* inst) static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); switch (esize) { case 8: { - const Xbyak::Xmm vec_128 = ctx.reg_alloc.ScratchXmm(code); + auto const vec_128 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(vec_128, code.Const(xword, 0x8080808080808080, 0x8080808080808080)); code.paddb(a, vec_128); @@ -3553,7 +3811,7 @@ static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, I break; } case 16: { - const Xbyak::Xmm vec_32768 = ctx.reg_alloc.ScratchXmm(code); + auto const vec_32768 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(vec_32768, code.Const(xword, 0x8000800080008000, 0x8000800080008000)); code.paddw(a, vec_32768); @@ -3563,7 +3821,7 @@ static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, I break; } case 32: { - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp1, a); code.por(a, b); @@ -3603,9 +3861,9 @@ static void EmitVectorRoundingHalvingAddUnsigned(size_t esize, EmitContext& ctx, case 32: { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp1, a); @@ -3669,18 +3927,18 @@ static void EmitUnsignedRoundingShiftLeft(BlockOfCode& code, EmitContext& ctx, I static_assert(esize == 32 || esize == 64); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); // positive values of b are left shifts, while negative values are (positive) rounding right shifts // only the lowest byte of each element is read as the shift amount // conveniently, the behavior of bit shifts greater than element width is the same in NEON and SSE/AVX - filled with zeros - const Xbyak::Xmm shift_amount = ctx.reg_alloc.ScratchXmm(code); + auto const shift_amount = ctx.reg_alloc.ScratchXmm(code); code.vpabsb(shift_amount, b); code.vpand(shift_amount, shift_amount, code.BConst(xword, 0xFF)); // if b is positive, do a normal left shift - const Xbyak::Xmm left_shift = ctx.reg_alloc.ScratchXmm(code); + auto const left_shift = ctx.reg_alloc.ScratchXmm(code); ICODE(vpsllv)(left_shift, a, shift_amount); // if b is negative, compute the rounding right shift @@ -3691,7 +3949,7 @@ static void EmitUnsignedRoundingShiftLeft(BlockOfCode& code, EmitContext& ctx, I // tmp = (a >> (b - 1)) & 1 // res = (a >> b) + tmp // to add the value of the last bit to be shifted off to the result of the right shift - const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(code); + auto const right_shift = ctx.reg_alloc.ScratchXmm(code); code.vmovdqa(xmm0, code.BConst(xword, 1)); // find value of last bit to be shifted off @@ -3775,12 +4033,12 @@ void EmitX64::EmitVectorRoundingShiftLeftU64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignExtend8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pmovsxbw(a, a); ctx.reg_alloc.DefineValue(code, inst, a); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.pxor(result, result); code.punpcklbw(result, a); code.psraw(result, 8); @@ -3791,12 +4049,12 @@ void EmitX64::EmitVectorSignExtend8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignExtend16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pmovsxwd(a, a); ctx.reg_alloc.DefineValue(code, inst, a); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.pxor(result, result); code.punpcklwd(result, a); code.psrad(result, 16); @@ -3806,12 +4064,12 @@ void EmitX64::EmitVectorSignExtend16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignExtend32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { code.pmovsxdq(a, a); } else { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movaps(tmp, a); code.psrad(tmp, 31); @@ -3824,7 +4082,7 @@ void EmitX64::EmitVectorSignExtend32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignExtend64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); const Xbyak::Reg64 gpr_tmp = ctx.reg_alloc.ScratchGpr(code); code.movq(gpr_tmp, data); @@ -3833,7 +4091,7 @@ void EmitX64::EmitVectorSignExtend64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { code.pinsrq(data, gpr_tmp, 1); } else { - const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_tmp = ctx.reg_alloc.ScratchXmm(code); code.movq(xmm_tmp, gpr_tmp); code.punpcklqdq(data, xmm_tmp); @@ -3844,9 +4102,9 @@ void EmitX64::EmitVectorSignExtend64(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorSignedAbsoluteDifference(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); // only signed 16-bit min/max are available below SSE4.1 if (code.HasHostFeature(HostFeature::SSE41) || esize == 16) { @@ -3912,11 +4170,11 @@ void EmitX64::EmitVectorSignedMultiply16(EmitContext& ctx, IR::Inst* inst) { const auto lower_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetLowerFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); if (upper_inst) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmulhw(result, x, y); } else { @@ -3928,7 +4186,7 @@ void EmitX64::EmitVectorSignedMultiply16(EmitContext& ctx, IR::Inst* inst) { } if (lower_inst) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmullw(result, x, y); } else { @@ -3946,9 +4204,9 @@ void EmitX64::EmitVectorSignedMultiply32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (lower_inst && !upper_inst && code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmulld(result, x, y); @@ -3957,16 +4215,16 @@ void EmitX64::EmitVectorSignedMultiply32(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (lower_inst) { - const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(code); + auto const lower_result = ctx.reg_alloc.ScratchXmm(code); code.vpmulld(lower_result, x, y); ctx.reg_alloc.DefineValue(code, lower_inst, lower_result); } - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmuldq(result, x, y); code.vpsrlq(x, x, 32); @@ -3978,12 +4236,12 @@ void EmitX64::EmitVectorSignedMultiply32(EmitContext& ctx, IR::Inst* inst) { return; } - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm sign_correction = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + auto const sign_correction = ctx.reg_alloc.ScratchXmm(code); + auto const upper_result = ctx.reg_alloc.ScratchXmm(code); + auto const lower_result = ctx.reg_alloc.ScratchXmm(code); // calculate sign correction code.movdqa(tmp, x); @@ -4026,7 +4284,7 @@ void EmitX64::EmitVectorSignedMultiply32(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr(code).cvt32(); // SSE absolute value functions return an unsigned result @@ -4038,21 +4296,34 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo // or shift in sign bits to create a mask of (msb == 1 ? -1 : 0), then add to the result vector switch (esize) { case 8: { - VectorAbs8(code, ctx, data); + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pabsb(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.pxor(temp, temp); + code.psubb(temp, data); + code.pminub(data, temp); + } code.pmovmskb(bit, data); - code.pminub(data, code.BConst<8>(xword, 0x7F)); break; } case 16: { - VectorAbs16(code, ctx, data); + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pabsw(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.pxor(temp, temp); + code.psubw(temp, data); + code.pmaxsw(data, temp); + } code.pmovmskb(bit, data); code.and_(bit, 0xAAAA); // toggle mask bits that aren't the msb of an int16 to 0 if (code.HasHostFeature(HostFeature::SSE41)) { code.pminuw(data, code.BConst<16>(xword, 0x7FFF)); } else { - const Xbyak::Xmm tmp = xmm0; + auto const tmp = xmm0; code.movdqa(tmp, data); code.psraw(data, 15); code.paddw(data, tmp); @@ -4060,13 +4331,21 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo break; } case 32: { - VectorAbs32(code, ctx, data); + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pabsd(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(temp, data); + code.psrad(temp, 31); + code.pxor(data, temp); + code.psubd(data, temp); + } code.movmskps(bit, data); if (code.HasHostFeature(HostFeature::SSE41)) { code.pminud(data, code.BConst<32>(xword, 0x7FFFFFFF)); } else { - const Xbyak::Xmm tmp = xmm0; + auto const tmp = xmm0; code.movdqa(tmp, data); code.psrad(data, 31); code.paddd(data, tmp); @@ -4074,10 +4353,18 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo break; } case 64: { - VectorAbs64(code, ctx, data); + if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { + code.vpabsq(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.pshufd(temp, data, 0b11110101); + code.psrad(temp, 31); + code.pxor(data, temp); + code.psubq(data, temp); + } code.movmskpd(bit, data); - const Xbyak::Xmm tmp = xmm0; + auto const tmp = xmm0; if (code.HasHostFeature(HostFeature::SSE42)) { // create a -1 mask if msb is set code.pxor(tmp, tmp); @@ -4119,13 +4406,13 @@ template static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); code.movdqa(xmm0, y); ctx.reg_alloc.Release(y); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); switch (bit_width) { case 8: @@ -4182,7 +4469,7 @@ static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitC switch (bit_width) { case 8: if (code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqb(tmp2, tmp2); code.pxor(tmp, tmp); code.vpblendvb(xmm0, tmp, tmp2, xmm0); @@ -4262,10 +4549,10 @@ void EmitX64::EmitVectorSignedSaturatedAccumulateUnsigned64(EmitContext& ctx, IR template static void EmitVectorSignedSaturatedDoublingMultiply16(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm upper_tmp = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm lower_tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const upper_tmp = ctx.reg_alloc.ScratchXmm(code); + auto const lower_tmp = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmulhw(upper_tmp, x, y); @@ -4284,7 +4571,7 @@ static void EmitVectorSignedSaturatedDoublingMultiply16(BlockOfCode& code, EmitC ctx.reg_alloc.Release(x); ctx.reg_alloc.Release(y); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { if constexpr (is_rounding) { @@ -4334,10 +4621,10 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm odds = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm even = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const odds = ctx.reg_alloc.ScratchXmm(code); + auto const even = ctx.reg_alloc.ScratchXmm(code); code.vpmuldq(odds, x, y); code.vpsrlq(x, x, 32); @@ -4350,7 +4637,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& code.vpaddq(odds, odds, odds); code.vpaddq(even, even, even); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); if constexpr (is_rounding) { code.vmovdqa(result, code.Const(xword, 0x0000000080000000, 0x0000000080000000)); @@ -4361,7 +4648,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& code.vpsrlq(result, odds, 32); code.vblendps(result, result, even, 0b1010); - const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code); + auto const mask = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.vpcmpeqd(mask, result, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); @@ -4376,11 +4663,11 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& return; } - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm sign_correction = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + auto const sign_correction = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); // calculate sign correction code.movdqa(tmp, x); @@ -4439,8 +4726,8 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyHighRounding32(EmitContex void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.punpcklwd(x, x); code.punpcklwd(y, y); @@ -4465,8 +4752,8 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx, void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmovsxdq(x, x); @@ -4517,10 +4804,10 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx, static void EmitVectorSignedSaturatedNarrowToSigned(size_t original_esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm src = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm dest = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm reconstructed = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm sign = ctx.reg_alloc.ScratchXmm(code); + auto const src = ctx.reg_alloc.UseXmm(code, args[0]); + auto const dest = ctx.reg_alloc.ScratchXmm(code); + auto const reconstructed = ctx.reg_alloc.ScratchXmm(code); + auto const sign = ctx.reg_alloc.ScratchXmm(code); code.movdqa(dest, src); code.pxor(xmm0, xmm0); @@ -4577,9 +4864,9 @@ void EmitX64::EmitVectorSignedSaturatedNarrowToSigned64(EmitContext& ctx, IR::In static void EmitVectorSignedSaturatedNarrowToUnsigned(size_t original_esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm src = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm dest = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm reconstructed = ctx.reg_alloc.ScratchXmm(code); + auto const src = ctx.reg_alloc.UseXmm(code, args[0]); + auto const dest = ctx.reg_alloc.ScratchXmm(code); + auto const reconstructed = ctx.reg_alloc.ScratchXmm(code); code.movdqa(dest, src); code.pxor(xmm0, xmm0); @@ -4647,9 +4934,9 @@ void EmitX64::EmitVectorSignedSaturatedNarrowToUnsigned64(EmitContext& ctx, IR:: static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const data = ctx.reg_alloc.UseXmm(code, args[0]); + auto const zero = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Address mask = [esize, &code] { switch (esize) { case 8: @@ -4665,7 +4952,7 @@ static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitCo } }(); - const auto vector_equality = [esize, &code](const Xbyak::Xmm& x, const auto& y) { + const auto vector_equality = [esize, &code](auto const& x, const auto& y) { switch (esize) { case 8: code.pcmpeqb(x, y); @@ -4810,33 +5097,23 @@ void EmitX64::EmitVectorSignedSaturatedShiftLeft64(EmitContext& ctx, IR::Inst* i EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeft); } -template> +template static bool VectorSignedSaturatedShiftLeftUnsigned(VectorArray& dst, const VectorArray& data, u8 shift_amount) { + using U = std::make_unsigned_t; static_assert(std::is_signed_v, "T must be signed."); - bool qc_flag = false; for (size_t i = 0; i < dst.size(); i++) { - const T element = data[i]; - const T shift = static_cast(shift_amount); - - if (element == 0) { - dst[i] = 0; - } else if (element < 0) { - dst[i] = 0; - qc_flag = true; - } else { - const U shifted = static_cast(element) << static_cast(shift); - const U shifted_test = shifted >> static_cast(shift); - - if (shifted_test != static_cast(element)) { - dst[i] = static_cast((std::numeric_limits::max)()); - qc_flag = true; - } else { - dst[i] = shifted; - } - } + auto const element = data[i]; + auto const shifted = U(element) << U(T(shift_amount)); + auto const shifted_test = shifted >> U(T(shift_amount)); + auto result = 0; + if (element > 0 && shifted_test != U(element)) + result = T((std::numeric_limits::max)()); + if (element > 0 && shifted_test == U(element)) + result = shifted; + qc_flag |= element < 0 || (element > 0 && shifted_test != U(element)); + dst[i] = result; } - return qc_flag; } @@ -4849,7 +5126,97 @@ void EmitX64::EmitVectorSignedSaturatedShiftLeftUnsigned16(EmitContext& ctx, IR: } void EmitX64::EmitVectorSignedSaturatedShiftLeftUnsigned32(EmitContext& ctx, IR::Inst* inst) { - EmitTwoArgumentFallbackWithSaturationAndImmediate(code, ctx, inst, VectorSignedSaturatedShiftLeftUnsigned); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const imm8 = args[1].GetImmediateU8(); + if (code.HasHostFeature(HostFeature::AVX2)) { + auto const tmp_flag = ctx.reg_alloc.ScratchGpr(code); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + if (imm8 == 0) { + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.vpshufd(tmp1, tmp0, 85); + code.vpshufd(tmp2, tmp0, 238); + code.vpor(tmp1, tmp1, tmp2); + code.vpshufd(tmp2, tmp0, 255); + code.vpor(tmp2, tmp2, tmp0); + code.vpor(tmp1, tmp1, tmp2); + code.vmovd(tmp_flag.cvt32(), tmp1); + code.shr(tmp_flag.cvt32(), 31); + code.vpxor(tmp1, tmp1, tmp1); + code.vpmaxsd(tmp0, tmp0, tmp1); + } else { + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + auto const cmp_value = u32(1ULL << 31) >> (imm8 - 1); + code.vpshufd(tmp1, tmp0, 238); + code.vpor(tmp1, tmp1, tmp0); + code.vpshufd(tmp2, tmp1, 85); + code.vpor(tmp1, tmp1, tmp2); + code.vmovd(tmp_flag.cvt32(), tmp1); + code.cmp(tmp_flag.cvt32(), cmp_value); + code.vpslld(tmp1, tmp0, imm8); + code.vpbroadcastd(tmp2, code.Const(dword, cmp_value - 2)); + code.vpbroadcastd(tmp3, code.Const(dword, cmp_value - 1)); + code.vpcmpgtd(tmp3, tmp0, tmp3); + code.vpcmpeqd(tmp4, tmp4, tmp4); + code.vpaddd(tmp0, tmp0, tmp4); + code.vpminud(tmp2, tmp0, tmp2); + code.vpcmpeqd(tmp0, tmp0, tmp2); + code.vblendvps(tmp0, tmp3, tmp1, tmp0); + code.setae(tmp_flag.cvt8()); + } + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp_flag.cvt8()); + ctx.reg_alloc.DefineValue(code, inst, tmp0); + } else { + auto const tmp_flag = ctx.reg_alloc.ScratchGpr(code); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + if (imm8 == 0) { + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.pshufd(tmp1, tmp0, 85); + code.pshufd(tmp2, tmp0, 238); + code.por(tmp2, tmp1); + code.pshufd(tmp1, tmp0, 255); + code.por(tmp1, tmp0); + code.por(tmp1, tmp2); + code.movd(tmp_flag.cvt32(), tmp1); + code.shr(tmp_flag.cvt32(), 31); + code.pxor(tmp1, tmp1); + code.movdqa(tmp2, tmp0); + code.pcmpgtd(tmp2, tmp1); + code.pand(tmp0, tmp2); + } else { + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + u64 const cmp_value = u64(1ULL << 31) >> (imm8 - 1); + u64 const cmp_one = cmp_value - 1; + u64 const cmp_add = (cmp_value - 2) + 0x80000000; + code.pshufd(tmp1, tmp0, 238); + code.por(tmp1, tmp0); + code.pshufd(tmp2, tmp1, 85); + code.por(tmp2, tmp1); + code.movd(tmp_flag.cvt32(), tmp2); + code.cmp(tmp_flag.cvt32(), cmp_value); + code.movdqa(tmp1, tmp0); + code.pslld(tmp1, imm8); + code.movdqa(tmp2, tmp0); + code.pcmpgtd(tmp2, code.Const(xword, cmp_one | (cmp_one << 32), cmp_one | (cmp_one << 32))); + code.pcmpeqd(tmp3, tmp3); + code.paddd(tmp0, tmp3); + code.pxor(tmp0, code.Const(xword, 0x80000000'80000000, 0x80000000'80000000)); + code.pcmpgtd(tmp0, code.Const(xword, cmp_add | (cmp_add << 32), cmp_add | (cmp_add << 32))); + code.pand(tmp2, tmp0); + code.pandn(tmp0, tmp1); + code.por(tmp0, tmp2); + code.setae(tmp_flag.cvt8()); + } + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp_flag.cvt8()); + ctx.reg_alloc.DefineValue(code, inst, tmp0); +// EmitTwoArgumentFallbackWithSaturationAndImmediate(code, ctx, inst, VectorSignedSaturatedShiftLeftUnsigned); + } } void EmitX64::EmitVectorSignedSaturatedShiftLeftUnsigned64(EmitContext& ctx, IR::Inst* inst) { @@ -4887,7 +5254,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { const bool is_defaults_zero = inst->GetArg(0).IsZero(); if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI)) { - const Xbyak::Xmm indicies = table_size <= 2 ? ctx.reg_alloc.UseXmm(code, args[2]) : ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const indicies = table_size <= 2 ? ctx.reg_alloc.UseXmm(code, args[2]) : ctx.reg_alloc.UseScratchXmm(code, args[2]); const u64 index_count = mcl::bit::replicate_element(static_cast(table_size * 8)); @@ -4895,43 +5262,43 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { switch (table_size) { case 1: { - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); if (is_defaults_zero) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpermb(result | k1 | T_z, indicies, xmm_table0); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermb(result | k1, indicies, xmm_table0); ctx.reg_alloc.DefineValue(code, inst, result); } break; } case 2: { - const Xbyak::Xmm xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.vpunpcklqdq(xmm0, xmm_table0_lower, xmm_table0_upper); if (is_defaults_zero) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpermb(result | k1 | T_z, indicies, xmm0); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermb(result | k1, indicies, xmm0); ctx.reg_alloc.DefineValue(code, inst, result); } break; } case 3: { - const Xbyak::Xmm xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(code, table[2]); + auto const xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table1 = ctx.reg_alloc.UseXmm(code, table[2]); code.vpunpcklqdq(xmm0, xmm_table0_lower, xmm_table0_upper); if (is_defaults_zero) { code.vpermi2b(indicies | k1 | T_z, xmm0, xmm_table1); ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermi2b(indicies, xmm0, xmm_table1); code.vmovdqu8(result | k1, indicies); ctx.reg_alloc.DefineValue(code, inst, result); @@ -4939,17 +5306,17 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { break; } case 4: { - const Xbyak::Xmm xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); - const Xbyak::Xmm xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); + auto const xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); + auto const xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); code.vpunpcklqdq(xmm0, xmm_table0_lower, xmm_table0_upper); code.vpunpcklqdq(xmm_table1, xmm_table1, xmm_table1_upper); if (is_defaults_zero) { code.vpermi2b(indicies | k1 | T_z, xmm0, xmm_table1); ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermi2b(indicies, xmm0, xmm_table1); code.vmovdqu8(result | k1, indicies); ctx.reg_alloc.DefineValue(code, inst, result); @@ -4972,9 +5339,9 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { }; if (code.HasHostFeature(HostFeature::SSSE3) && is_defaults_zero && table_size == 1) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.xorps(result, result); code.movsd(result, xmm_table0); @@ -4986,9 +5353,9 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::SSSE3) && is_defaults_zero && table_size == 2) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.punpcklqdq(xmm_table0, xmm_table0_upper); code.paddusb(indicies, code.Const(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); @@ -4999,12 +5366,12 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::SSE41) && table_size <= 2) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const defaults = ctx.reg_alloc.UseXmm(code, args[0]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); if (table_size == 2) { - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.punpcklqdq(xmm_table0, xmm_table0_upper); ctx.reg_alloc.Release(xmm_table0_upper); } @@ -5023,12 +5390,12 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::SSE41) && is_defaults_zero) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); { - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.punpcklqdq(xmm_table0, xmm_table0_upper); ctx.reg_alloc.Release(xmm_table0_upper); } @@ -5037,7 +5404,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { code.punpcklqdq(xmm_table1, xmm0); } else { ASSERT(table_size == 4); - const Xbyak::Xmm xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); + auto const xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); code.punpcklqdq(xmm_table1, xmm_table1_upper); ctx.reg_alloc.Release(xmm_table1_upper); } @@ -5058,18 +5425,18 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const defaults = ctx.reg_alloc.UseXmm(code, args[0]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); { - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.punpcklqdq(xmm_table0, xmm_table0_upper); ctx.reg_alloc.Release(xmm_table0_upper); } if (table_size == 4) { - const Xbyak::Xmm xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); + auto const xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); code.punpcklqdq(xmm_table1, xmm_table1_upper); ctx.reg_alloc.Release(xmm_table1_upper); } @@ -5098,37 +5465,31 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { const u32 stack_space = static_cast(6 * 8); ctx.reg_alloc.AllocStackSpace(code, stack_space + ABI_SHADOW_SPACE); for (size_t i = 0; i < table_size; ++i) { - const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(code, table[i]); + auto const table_value = ctx.reg_alloc.UseXmm(code, table[i]); code.movq(qword[rsp + ABI_SHADOW_SPACE + i * 8], table_value); ctx.reg_alloc.Release(table_value); } - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const defaults = ctx.reg_alloc.UseXmm(code, args[0]); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); - code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 4 * 8]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 5 * 8]); code.mov(code.ABI_PARAM4.cvt32(), table_size); code.movq(qword[code.ABI_PARAM2], defaults); code.movq(qword[code.ABI_PARAM3], indicies); - - code.CallLambda( - [](const HalfVectorArray* table, HalfVectorArray& result, const HalfVectorArray& indicies, size_t table_size) { - for (size_t i = 0; i < result.size(); ++i) { - const size_t index = indicies[i] / table[0].size(); - const size_t elem = indicies[i] % table[0].size(); - if (index < table_size) { - result[i] = table[index][elem]; - } - } - }); - + code.CallLambda([](const HalfVectorArray* table, HalfVectorArray& result, const HalfVectorArray& indicies, size_t table_size) { + for (size_t i = 0; i < result.size(); ++i) { + const size_t index = indicies[i] / table[0].size(); + const size_t elem = indicies[i] % table[0].size(); + if (index < table_size) + result[i] = table[index][elem]; + } + }); code.movq(result, qword[rsp + ABI_SHADOW_SPACE + 4 * 8]); ctx.reg_alloc.ReleaseStackSpace(code, stack_space + ABI_SHADOW_SPACE); - ctx.reg_alloc.DefineValue(code, inst, result); } @@ -5142,14 +5503,14 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { const bool is_defaults_zero = !inst->GetArg(0).IsImmediate() && inst->GetArg(0).GetInst()->GetOpcode() == IR::Opcode::ZeroVector; if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI) && table_size == 4) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); code.vpcmpub(k1, indicies, code.BConst<8>(xword, 2 * 16), CmpInt::LessThan); code.vpcmpub(k2, indicies, code.BConst<8>(xword, 4 * 16), CmpInt::LessThan); // Handle vector-table 0,1 - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); code.vpermi2b(indicies | k1, xmm_table0, xmm_table1); @@ -5157,8 +5518,8 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.Release(xmm_table1); // Handle vector-table 2,3 - const Xbyak::Xmm xmm_table2 = ctx.reg_alloc.UseXmm(code, table[2]); - const Xbyak::Xmm xmm_table3 = ctx.reg_alloc.UseXmm(code, table[3]); + auto const xmm_table2 = ctx.reg_alloc.UseXmm(code, table[2]); + auto const xmm_table3 = ctx.reg_alloc.UseXmm(code, table[3]); code.kandnw(k1, k1, k2); code.vpermi2b(indicies | k1, xmm_table2, xmm_table3); @@ -5167,19 +5528,19 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { code.vmovdqu8(indicies | k2 | T_z, indicies); ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm defaults = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const defaults = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vmovdqu8(defaults | k2, indicies); ctx.reg_alloc.DefineValue(code, inst, defaults); } } else if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI) && table_size == 3) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); code.vpcmpub(k1, indicies, code.BConst<8>(xword, 2 * 16), CmpInt::LessThan); code.vpcmpub(k2, indicies, code.BConst<8>(xword, 3 * 16), CmpInt::LessThan); // Handle vector-table 0,1 - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); code.vpermi2b(indicies | k1, xmm_table0, xmm_table1); @@ -5187,7 +5548,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.Release(xmm_table1); // Handle vector-table 2 - const Xbyak::Xmm xmm_table2 = ctx.reg_alloc.UseXmm(code, table[2]); + auto const xmm_table2 = ctx.reg_alloc.UseXmm(code, table[2]); code.kandnw(k1, k1, k2); code.vpermb(indicies | k1, indicies, xmm_table2); @@ -5196,14 +5557,14 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { code.vmovdqu8(indicies | k2 | T_z, indicies); ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm defaults = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const defaults = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vmovdqu8(defaults | k2, indicies); ctx.reg_alloc.DefineValue(code, inst, defaults); } } else if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI) && table_size == 2) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); code.vpcmpub(k1, indicies, code.BConst<8>(xword, 2 * 16), CmpInt::LessThan); @@ -5211,36 +5572,36 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { code.vpermi2b(indicies | k1 | T_z, xmm_table0, xmm_table1); ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermi2b(indicies, xmm_table0, xmm_table1); code.vmovdqu8(result | k1, indicies); ctx.reg_alloc.DefineValue(code, inst, result); } } else if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI) && table_size == 1) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); code.vpcmpub(k1, indicies, code.BConst<8>(xword, 1 * 16), CmpInt::LessThan); if (is_defaults_zero) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpermb(result | k1 | T_z, indicies, xmm_table0); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermb(result | k1, indicies, xmm_table0); ctx.reg_alloc.DefineValue(code, inst, result); } } else if (code.HasHostFeature(HostFeature::SSSE3) && is_defaults_zero && table_size == 1) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); code.paddusb(indicies, code.Const(xword, 0x7070707070707070, 0x7070707070707070)); code.pshufb(xmm_table0, indicies); ctx.reg_alloc.DefineValue(code, inst, xmm_table0); } else if (code.HasHostFeature(HostFeature::SSE41) && table_size == 1) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const defaults = ctx.reg_alloc.UseXmm(code, args[0]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpaddusb(xmm0, indicies, code.Const(xword, 0x7070707070707070, 0x7070707070707070)); @@ -5253,9 +5614,9 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, xmm_table0); } else if (code.HasHostFeature(HostFeature::SSE41) && is_defaults_zero && table_size == 2) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[1]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[1]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpaddusb(xmm0, indicies, code.Const(xword, 0x7070707070707070, 0x7070707070707070)); @@ -5271,14 +5632,14 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, xmm_table0); return; } else if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm(code); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const masked = ctx.reg_alloc.ScratchXmm(code); code.vpandd(masked, indicies, code.Const(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); for (size_t i = 0; i < table_size; ++i) { - const Xbyak::Xmm xmm_table = ctx.reg_alloc.UseScratchXmm(code, table[i]); + auto const xmm_table = ctx.reg_alloc.UseScratchXmm(code, table[i]); const Xbyak::Opmask table_mask = k1; const u64 table_index = mcl::bit::replicate_element(i * 16); @@ -5295,15 +5656,15 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, result); } else if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm(code); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const masked = ctx.reg_alloc.ScratchXmm(code); code.movaps(masked, code.Const(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); code.pand(masked, indicies); for (size_t i = 0; i < table_size; ++i) { - const Xbyak::Xmm xmm_table = ctx.reg_alloc.UseScratchXmm(code, table[i]); + auto const xmm_table = ctx.reg_alloc.UseScratchXmm(code, table[i]); const u64 table_index = mcl::bit::replicate_element(i * 16); @@ -5327,13 +5688,13 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { const u32 stack_space = static_cast((table_size + 2) * 16); ctx.reg_alloc.AllocStackSpace(code, stack_space + ABI_SHADOW_SPACE); for (size_t i = 0; i < table_size; ++i) { - const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(code, table[i]); + auto const table_value = ctx.reg_alloc.UseXmm(code, table[i]); code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], table_value); ctx.reg_alloc.Release(table_value); } - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const defaults = ctx.reg_alloc.UseXmm(code, args[0]); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]); @@ -5360,8 +5721,8 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorTranspose8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm upper = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const upper = ctx.reg_alloc.UseScratchXmm(code, args[1]); const bool part = args[2].GetImmediateU1(); if (!part) { @@ -5379,8 +5740,8 @@ void EmitX64::EmitVectorTranspose8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorTranspose16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm upper = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const upper = ctx.reg_alloc.UseScratchXmm(code, args[1]); const bool part = args[2].GetImmediateU1(); if (!part) { @@ -5398,8 +5759,8 @@ void EmitX64::EmitVectorTranspose16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorTranspose32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm upper = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const upper = ctx.reg_alloc.UseXmm(code, args[1]); const bool part = args[2].GetImmediateU1(); code.shufps(lower, upper, !part ? 0b10001000 : 0b11011101); @@ -5411,8 +5772,8 @@ void EmitX64::EmitVectorTranspose32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorTranspose64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm upper = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const upper = ctx.reg_alloc.UseXmm(code, args[1]); const bool part = args[2].GetImmediateU1(); code.shufpd(lower, upper, !part ? 0b00 : 0b11); @@ -5420,89 +5781,87 @@ void EmitX64::EmitVectorTranspose64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, lower); } -static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { + +void EmitX64::EmitVectorUnsignedAbsoluteDifference8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); - - switch (esize) { - case 8: { - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - + if (code.HasHostFeature(HostFeature::AVX)) { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.vpminub(tmp2, tmp0, tmp1); + code.vpmaxub(tmp0, tmp0, tmp1); + code.vpsubb(tmp0, tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.movdqa(temp, x); code.psubusb(temp, y); code.psubusb(y, x); code.por(temp, y); - break; + ctx.reg_alloc.DefineValue(code, inst, temp); } - case 16: { - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); +} +void EmitX64::EmitVectorUnsignedAbsoluteDifference16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + if (code.HasHostFeature(HostFeature::AVX)) { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.vpminuw(tmp2, tmp0, tmp1); + code.vpmaxuw(tmp0, tmp0, tmp1); + code.vpsubw(tmp0, tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.movdqa(temp, x); code.psubusw(temp, y); code.psubusw(y, x); code.por(temp, y); - break; + ctx.reg_alloc.DefineValue(code, inst, temp); } - case 32: - // See https://stackoverflow.com/questions/3380785/compute-the-absolute-difference-between-unsigned-integers-using-sse/3527267#3527267 - if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - - code.movdqa(temp, x); - code.pminud(x, y); - code.pmaxud(temp, y); - code.psubd(temp, x); - } else { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - if (ctx.HasOptimization(OptimizationFlag::CodeSpeed)) { - // About 45 bytes - const Xbyak::Xmm temp_x = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm temp_y = ctx.reg_alloc.ScratchXmm(code); - code.pcmpeqd(temp, temp); - code.pslld(temp, 31); - code.movdqa(temp_x, x); - code.movdqa(temp_y, y); - code.paddd(temp_x, x); - code.paddd(temp_y, y); - code.pcmpgtd(temp_y, temp_x); - code.psubd(x, y); - code.pandn(temp, temp_y); - code.pxor(x, y); - code.psubd(x, y); - } else { - // Smaller code size - about 36 bytes - code.movdqa(temp, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); - code.pxor(x, temp); - code.pxor(y, temp); - code.movdqa(temp, x); - code.psubd(temp, y); - code.pcmpgtd(y, x); - code.psrld(y, 1); - code.pxor(temp, y); - code.psubd(temp, y); - } - } - break; - } - - ctx.reg_alloc.DefineValue(code, inst, temp); -} - -void EmitX64::EmitVectorUnsignedAbsoluteDifference8(EmitContext& ctx, IR::Inst* inst) { - EmitVectorUnsignedAbsoluteDifference(8, ctx, inst, code); -} - -void EmitX64::EmitVectorUnsignedAbsoluteDifference16(EmitContext& ctx, IR::Inst* inst) { - EmitVectorUnsignedAbsoluteDifference(16, ctx, inst, code); } void EmitX64::EmitVectorUnsignedAbsoluteDifference32(EmitContext& ctx, IR::Inst* inst) { - EmitVectorUnsignedAbsoluteDifference(32, ctx, inst, code); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + if (code.HasHostFeature(HostFeature::AVX)) { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.vpminud(tmp2, tmp0, tmp1); + code.vpmaxud(tmp0, tmp0, tmp1); + code.vpsubd(tmp0, tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); + } else if (code.HasHostFeature(HostFeature::SSE41)) { + // See https://stackoverflow.com/questions/3380785/compute-the-absolute-difference-between-unsigned-integers-using-sse/3527267#3527267 + auto const temp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + code.movdqa(temp, x); + code.pminud(x, y); + code.pmaxud(temp, y); + code.psubd(temp, x); + ctx.reg_alloc.DefineValue(code, inst, temp); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, code.Const(xword, 0x8000'00008000'0000, 0x8000'00008000'0000)); + code.movdqa(tmp3, tmp1); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp0); + code.pcmpgtd(tmp2, tmp3); + code.psubd(tmp0, tmp1); + code.pxor(tmp0, tmp2); + code.psubd(tmp2, tmp0); + //code.movdqa(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp2); + } } void EmitX64::EmitVectorUnsignedMultiply16(EmitContext& ctx, IR::Inst* inst) { @@ -5510,11 +5869,11 @@ void EmitX64::EmitVectorUnsignedMultiply16(EmitContext& ctx, IR::Inst* inst) { const auto lower_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetLowerFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); if (upper_inst) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmulhuw(result, x, y); } else { @@ -5526,7 +5885,7 @@ void EmitX64::EmitVectorUnsignedMultiply16(EmitContext& ctx, IR::Inst* inst) { } if (lower_inst) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmullw(result, x, y); } else { @@ -5544,24 +5903,24 @@ void EmitX64::EmitVectorUnsignedMultiply32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (lower_inst && !upper_inst && code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmulld(result, x, y); ctx.reg_alloc.DefineValue(code, lower_inst, result); } else if (code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (lower_inst) { - const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(code); + auto const lower_result = ctx.reg_alloc.ScratchXmm(code); code.vpmulld(lower_result, x, y); ctx.reg_alloc.DefineValue(code, lower_inst, lower_result); } - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmuludq(result, x, y); code.vpsrlq(x, x, 32); @@ -5571,11 +5930,11 @@ void EmitX64::EmitVectorUnsignedMultiply32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, upper_inst, result); } else { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm upper_result = upper_inst ? ctx.reg_alloc.ScratchXmm(code) : Xbyak::Xmm{-1}; - const Xbyak::Xmm lower_result = lower_inst ? ctx.reg_alloc.ScratchXmm(code) : Xbyak::Xmm{-1}; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + auto const upper_result = upper_inst ? ctx.reg_alloc.ScratchXmm(code) : Xbyak::Xmm{-1}; + auto const lower_result = lower_inst ? ctx.reg_alloc.ScratchXmm(code) : Xbyak::Xmm{-1}; // calculate unsigned multiply code.movdqa(tmp, x); @@ -5792,11 +6151,11 @@ void EmitX64::EmitVectorUnsignedSaturatedShiftLeft64(EmitContext& ctx, IR::Inst* void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { code.pmovzxbw(a, a); } else { - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.punpcklbw(a, zeros); } @@ -5805,11 +6164,11 @@ void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorZeroExtend16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { code.pmovzxwd(a, a); } else { - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.punpcklwd(a, zeros); } @@ -5818,11 +6177,11 @@ void EmitX64::EmitVectorZeroExtend16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorZeroExtend32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { code.pmovzxdq(a, a); } else { - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.punpckldq(a, zeros); } @@ -5831,8 +6190,8 @@ void EmitX64::EmitVectorZeroExtend32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorZeroExtend64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.punpcklqdq(a, zeros); ctx.reg_alloc.DefineValue(code, inst, a); @@ -5840,7 +6199,7 @@ void EmitX64::EmitVectorZeroExtend64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorZeroUpper(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.movq(a, a); // TODO: !IsLastUse @@ -5848,7 +6207,7 @@ void EmitX64::EmitVectorZeroUpper(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitZeroVector(EmitContext& ctx, IR::Inst* inst) { - const Xbyak::Xmm a = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.ScratchXmm(code); code.pxor(a, a); ctx.reg_alloc.DefineValue(code, inst, a); } diff --git a/src/dynarmic/src/dynarmic/interface/A32/a32.h b/src/dynarmic/src/dynarmic/interface/A32/a32.h index bc30b59c1e..00e9e045ef 100644 --- a/src/dynarmic/src/dynarmic/interface/A32/a32.h +++ b/src/dynarmic/src/dynarmic/interface/A32/a32.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -14,8 +14,8 @@ #include #include -#include "dynarmic/interface/A32/config.h" -#include "dynarmic/interface/halt_reason.h" +#include "config.h" +#include "dynarmic/src/dynarmic/interface/halt_reason.h" namespace Dynarmic { namespace A32 { diff --git a/src/dynarmic/src/dynarmic/interface/A32/config.h b/src/dynarmic/src/dynarmic/interface/A32/config.h index 5a97fb69f3..9f1f5cc0cd 100644 --- a/src/dynarmic/src/dynarmic/interface/A32/config.h +++ b/src/dynarmic/src/dynarmic/interface/A32/config.h @@ -14,9 +14,9 @@ #include #include -#include "dynarmic/frontend/A32/translate/translate_callbacks.h" -#include "dynarmic/interface/A32/arch_version.h" -#include "dynarmic/interface/optimization_flags.h" +#include "../../frontend/A32/translate/translate_callbacks.h" +#include "arch_version.h" +#include "../optimization_flags.h" namespace Dynarmic { class ExclusiveMonitor; diff --git a/src/dynarmic/src/dynarmic/interface/A64/a64.h b/src/dynarmic/src/dynarmic/interface/A64/a64.h index e8d2352702..86c24b02a1 100644 --- a/src/dynarmic/src/dynarmic/interface/A64/a64.h +++ b/src/dynarmic/src/dynarmic/interface/A64/a64.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -15,8 +15,8 @@ #include #include -#include "dynarmic/interface/A64/config.h" -#include "dynarmic/interface/halt_reason.h" +#include "config.h" +#include "../halt_reason.h" namespace Dynarmic { namespace A64 { diff --git a/src/dynarmic/src/dynarmic/interface/A64/config.h b/src/dynarmic/src/dynarmic/interface/A64/config.h index 83c1593fd8..f6d340c47f 100644 --- a/src/dynarmic/src/dynarmic/interface/A64/config.h +++ b/src/dynarmic/src/dynarmic/interface/A64/config.h @@ -14,7 +14,7 @@ #include #include -#include "dynarmic/interface/optimization_flags.h" +#include "../optimization_flags.h" namespace Dynarmic { class ExclusiveMonitor; diff --git a/src/dynarmic/src/dynarmic/interface/exclusive_monitor.h b/src/dynarmic/src/dynarmic/interface/exclusive_monitor.h index 566743c767..9315563888 100644 --- a/src/dynarmic/src/dynarmic/interface/exclusive_monitor.h +++ b/src/dynarmic/src/dynarmic/interface/exclusive_monitor.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2018 MerryMage * SPDX-License-Identifier: 0BSD @@ -11,7 +14,7 @@ #include #include -#include +#include "dynarmic/src/dynarmic/common/spin_lock.h" namespace Dynarmic { diff --git a/src/dynarmic/tests/A64/a64.cpp b/src/dynarmic/tests/A64/a64.cpp index 4d4484e53e..e85986ea5a 100644 --- a/src/dynarmic/tests/A64/a64.cpp +++ b/src/dynarmic/tests/A64/a64.cpp @@ -415,6 +415,105 @@ TEST_CASE("A64: URSHL", "[a64]") { CHECK(jit.GetVector(9) == Vector{0x0000000000000002, 0x12db8b8280e0ba}); } +TEST_CASE("A64: SQSHLU", "[a64]") { + A64TestEnv env; + A64::UserConfig jit_user_config{}; + jit_user_config.callbacks = &env; + A64::Jit jit{jit_user_config}; + + oaknut::VectorCodeGenerator code{env.code_mem, nullptr}; + code.SQSHLU(V8.B16(), V0.B16(), 1); + code.SQSHLU(V9.H8(), V1.H8(), 2); + code.SQSHLU(V10.S4(), V2.S4(), 28); + code.SQSHLU(V11.D2(), V3.D2(), 4); + code.SQSHLU(V12.S4(), V0.S4(), 1); + code.SQSHLU(V13.S4(), V1.S4(), 3); + code.SQSHLU(V14.S4(), V2.S4(), 0); + code.SQSHLU(V15.S4(), V3.S4(), 0); + + jit.SetVector(0, Vector{0xffffffff'18ba6a6a, 0x7fffffff'943b954f}); + jit.SetVector(1, Vector{0x0000000b'0000000f, 0xffffffff'ffffffff}); + jit.SetVector(2, Vector{0x00000001'000000ff, 0x00000010'0000007f}); + jit.SetVector(3, Vector{0xffffffffffffffff, 0x96dc5c140705cd04}); + + env.ticks_left = env.code_mem.size(); + CheckedRun([&]() { jit.Run(); }); + + CHECK(jit.GetVector(8) == Vector{0x3000d4d4, 0xfe0000000076009e}); + CHECK(jit.GetVector(9) == Vector{0x2c0000003c, 0}); + CHECK(jit.GetVector(10) == Vector{0x10000000'ffffffff, 0xffffffff'ffffffff}); + CHECK(jit.GetVector(11) == Vector{0, 0}); + CHECK(jit.GetVector(12) == Vector{0x3174d4d4, 0xfffffffe00000000}); + CHECK(jit.GetVector(13) == Vector{0x5800000078, 0}); + CHECK(jit.GetVector(14) == Vector{0x1000000ff, 0x100000007f}); + CHECK(jit.GetVector(15) == Vector{0, 0x705cd04}); +} + +TEST_CASE("A64: SMIN", "[a64]") { + A64TestEnv env; + A64::UserConfig jit_user_config{}; + jit_user_config.callbacks = &env; + A64::Jit jit{jit_user_config}; + + oaknut::VectorCodeGenerator code{env.code_mem, nullptr}; + code.SMIN(V8.B16(), V0.B16(), V3.B16()); + code.SMIN(V9.H8(), V1.H8(), V2.H8()); + code.SMIN(V10.S4(), V2.S4(), V3.S4()); + code.SMIN(V11.S4(), V3.S4(), V3.S4()); + code.SMIN(V12.S4(), V0.S4(), V3.S4()); + code.SMIN(V13.S4(), V1.S4(), V2.S4()); + code.SMIN(V14.S4(), V2.S4(), V1.S4()); + code.SMIN(V15.S4(), V3.S4(), V0.S4()); + + jit.SetPC(0); + jit.SetVector(0, Vector{0xffffffff'18ba6a6a, 0x7fffffff'943b954f}); + jit.SetVector(1, Vector{0x0000000b'0000000f, 0xffffffff'ffffffff}); + jit.SetVector(2, Vector{0x00000001'000000ff, 0x00000010'0000007f}); + jit.SetVector(3, Vector{0xffffffff'ffffffff, 0x96dc5c14'0705cd04}); + + env.ticks_left = 4; + CheckedRun([&]() { jit.Run(); }); + + REQUIRE(jit.GetVector(8) == Vector{0xffffffffffbaffff, 0x96dcffff94059504}); + REQUIRE(jit.GetVector(9) == Vector{0x10000000f, 0xffffffffffffffff}); + REQUIRE(jit.GetVector(10) == Vector{0xffffffffffffffff, 0x96dc5c140000007f}); +} + +TEST_CASE("A64: SMINP", "[a64]") { + A64TestEnv env; + A64::UserConfig jit_user_config{}; + jit_user_config.callbacks = &env; + A64::Jit jit{jit_user_config}; + + oaknut::VectorCodeGenerator code{env.code_mem, nullptr}; + code.SMINP(V8.B16(), V0.B16(), V3.B16()); + code.SMINP(V9.H8(), V1.H8(), V2.H8()); + code.SMINP(V10.S4(), V2.S4(), V1.S4()); + code.SMINP(V11.S4(), V3.S4(), V3.S4()); + code.SMINP(V12.S4(), V0.S4(), V3.S4()); + code.SMINP(V13.S4(), V1.S4(), V2.S4()); + code.SMINP(V14.S4(), V2.S4(), V1.S4()); + code.SMINP(V15.S4(), V3.S4(), V0.S4()); + + jit.SetPC(0); + jit.SetVector(0, Vector{0xffffffff'18ba6a6a, 0x7fffffff'943b954f}); + jit.SetVector(1, Vector{0x0000000b'0000000f, 0xffffffff'ffffffff}); + jit.SetVector(2, Vector{0x00000001'000000ff, 0x00000010'0000007f}); + jit.SetVector(3, Vector{0xffffffff'ffffffff, 0x96dc5c14'0705cd04}); + + env.ticks_left = 4; + CheckedRun([&]() { jit.Run(); }); + + REQUIRE(jit.GetVector(8) == Vector{0xffff9495ffffba6a, 0x961405cdffffffff}); + REQUIRE(jit.GetVector(9) == Vector{0xffffffff00000000, 0}); + REQUIRE(jit.GetVector(10) == Vector{0x1000000001, 0xffffffff0000000b}); + REQUIRE(jit.GetVector(11) == Vector{0x96dc5c14ffffffff, 0x96dc5c14ffffffff}); + REQUIRE(jit.GetVector(12) == Vector{0x943b954fffffffff, 0x96dc5c14ffffffff}); + REQUIRE(jit.GetVector(13) == Vector{0xffffffff0000000b, 0x1000000001}); + REQUIRE(jit.GetVector(14) == Vector{0x1000000001, 0xffffffff0000000b}); + REQUIRE(jit.GetVector(15) == Vector{0x96dc5c14ffffffff, 0x943b954fffffffff}); +} + TEST_CASE("A64: XTN", "[a64]") { A64TestEnv env; A64::UserConfig jit_user_config{}; diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index ca44dce593..83b2849b73 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -1,6 +1,5 @@ # SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project # SPDX-License-Identifier: GPL-3.0-or-later -include(TargetArchitectureSpecificSources) add_executable(dynarmic_tests fp/FPToFixed.cpp @@ -50,7 +49,7 @@ endif() if ("x86_64" IN_LIST ARCHITECTURE) target_link_libraries(dynarmic_tests PRIVATE xbyak::xbyak) - target_architecture_specific_sources(dynarmic_tests "x86_64" + target_sources(dynarmic PRIVATE x64_cpu_info.cpp native/preserve_xmm.cpp ) diff --git a/src/frontend_common/CMakeLists.txt b/src/frontend_common/CMakeLists.txt index a6d580cb18..f9b3805939 100644 --- a/src/frontend_common/CMakeLists.txt +++ b/src/frontend_common/CMakeLists.txt @@ -22,8 +22,6 @@ if (ENABLE_UPDATE_CHECKER) target_sources(frontend_common PRIVATE update_checker.cpp update_checker.h) - - target_compile_definitions(frontend_common PUBLIC CPPHTTPLIB_OPENSSL_SUPPORT) target_link_libraries(frontend_common PRIVATE OpenSSL::SSL OpenSSL::Crypto) endif() diff --git a/src/frontend_common/update_checker.cpp b/src/frontend_common/update_checker.cpp index 75b1dc00cc..3e5a832ba0 100644 --- a/src/frontend_common/update_checker.cpp +++ b/src/frontend_common/update_checker.cpp @@ -13,7 +13,7 @@ #include "common/scm_rev.h" #include "update_checker.h" -#include +#include "common/httplib.h" #ifdef YUZU_BUNDLED_OPENSSL #include diff --git a/src/ios/AdvancedSettingsView.swift b/src/ios/AdvancedSettingsView.swift new file mode 100644 index 0000000000..9c40af442c --- /dev/null +++ b/src/ios/AdvancedSettingsView.swift @@ -0,0 +1,77 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import SwiftUI +import UniformTypeIdentifiers + +struct AdvancedSettingsView: View { + @AppStorage("exitgame") var exitgame: Bool = false + @AppStorage("ClearBackingRegion") var kpagetable: Bool = false + @AppStorage("WaitingforJIT") var waitingJIT: Bool = false + @AppStorage("cangetfullpath") var canGetFullPath: Bool = false + @AppStorage("onscreenhandheld") var onscreenjoy: Bool = false + var body: some View { + ScrollView { + Rectangle() + .fill(Color(uiColor: UIColor.secondarySystemBackground)) + .cornerRadius(10) + .frame(width: .infinity, height: 50) + .overlay() { + HStack { + Toggle("Exit Game Button", isOn: $exitgame) + .padding() + } + } + Text("This is very unstable and can lead to game freezing and overall bad preformance after you exit a game") + .padding(.bottom) + .font(.footnote) + .foregroundColor(.gray) + Rectangle() + .fill(Color(uiColor: UIColor.secondarySystemBackground)) + .cornerRadius(10) + .frame(width: .infinity, height: 50) + .overlay() { + HStack { + Toggle("Memory Usage Increase", isOn: $kpagetable) + .padding() + } + } + Text("This makes games way more stable but a lot of games will crash as you will run out of Memory way quicker. (Don't Enable this on devices with less then 8GB of memory as most games will crash)") + .padding(.bottom) + .font(.footnote) + .foregroundColor(.gray) + + Rectangle() + .fill(Color(uiColor: UIColor.secondarySystemBackground)) + .cornerRadius(10) + .frame(width: .infinity, height: 50) + .overlay() { + HStack { + Toggle("Check for Booting OS", isOn: $canGetFullPath) + .padding() + } + } + Text("If you do not have the neccesary files for Booting the Switch OS, it will just crash almost instantly.") + .padding(.bottom) + .font(.footnote) + .foregroundColor(.gray) + + Rectangle() + .fill(Color(uiColor: UIColor.secondarySystemBackground)) + .cornerRadius(10) + .frame(width: .infinity, height: 50) + .overlay() { + HStack { + Toggle("Set OnScreen Controls to Handheld", isOn: $onscreenjoy) + .padding() + } + } + Text("You need in Core Settings to set \"use_docked_mode = 0\"") + .padding(.bottom) + .font(.footnote) + .foregroundColor(.gray) + } + } +} diff --git a/src/ios/Air.swift b/src/ios/Air.swift new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/ios/AirPlay.swift b/src/ios/AirPlay.swift new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/ios/AppIconProvider.swift b/src/ios/AppIconProvider.swift new file mode 100644 index 0000000000..61ebd4df64 --- /dev/null +++ b/src/ios/AppIconProvider.swift @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import Foundation + +enum AppIconProvider { + static func appIcon(in bundle: Bundle = .main) -> String { + guard let icons = bundle.object(forInfoDictionaryKey: "CFBundleIcons") as? [String: Any], + let primaryIcon = icons["CFBundlePrimaryIcon"] as? [String: Any], + let iconFiles = primaryIcon["CFBundleIconFiles"] as? [String], + let iconFileName = iconFiles.last else { + print("Could not find icons in bundle") + return "" + } + return iconFileName + } +} diff --git a/src/ios/AppUI-Bridging-Header.h b/src/ios/AppUI-Bridging-Header.h new file mode 100644 index 0000000000..00fadaa6dc --- /dev/null +++ b/src/ios/AppUI-Bridging-Header.h @@ -0,0 +1,11 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Jarrod Norwell +// SPDX-License-Identifier: GPL-2.0-or-later + +#ifndef AppUI_Bridging_Header_h +#define AppUI_Bridging_Header_h + +#import "AppUIObjC.h" + +#endif /* AppUI_Bridging_Header_h */ diff --git a/src/ios/AppUI.swift b/src/ios/AppUI.swift new file mode 100644 index 0000000000..4aff2db905 --- /dev/null +++ b/src/ios/AppUI.swift @@ -0,0 +1,103 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Jarrod Norwell +// SPDX-License-Identifier: GPL-2.0-or-later + +import UIKit +import Foundation +import QuartzCore.CAMetalLayer + +public struct AppUI { + public static let shared = AppUI() + fileprivate let appUIObjC = AppUIObjC.shared() + + public func configure(layer: CAMetalLayer, with size: CGSize) { + appUIObjC.configure(layer: layer, with: size) + } + + public func information(for url: URL) -> AppUIInformation { + appUIObjC.gameInformation.information(for: url) + } + + public func insert(game url: URL) { + appUIObjC.insert(game: url) + } + + public func insert(games urls: [URL]) { + appUIObjC.insert(games: urls) + } + + public func bootOS() { + appUIObjC.bootOS() + } + + public func pause() { + appUIObjC.pause() + } + + public func play() { + appUIObjC.play() + } + + public func ispaused() -> Bool { + return appUIObjC.ispaused() + } + + public func FirstFrameShowed() -> Bool { + return appUIObjC.hasfirstfame() + } + + public func canGetFullPath() -> Bool { + return appUIObjC.canGetFullPath() + } + + + public func exit() { + appUIObjC.quit() + } + + public func step() { + appUIObjC.step() + } + + public func orientationChanged(orientation: UIInterfaceOrientation, with layer: CAMetalLayer, size: CGSize) { + appUIObjC.orientationChanged(orientation: orientation, with: layer, size: size) + } + + public func touchBegan(at point: CGPoint, for index: UInt) { + appUIObjC.touchBegan(at: point, for: index) + } + + public func touchEnded(for index: UInt) { + appUIObjC.touchEnded(for: index) + } + + public func touchMoved(at point: CGPoint, for index: UInt) { + appUIObjC.touchMoved(at: point, for: index) + } + + public func gyroMoved(x: Float, y: Float, z: Float, accelX: Float, accelY: Float, accelZ: Float, controllerId: Int32, deltaTimestamp: Int32) { + // Calling the Objective-C function with both gyroscope and accelerometer data + appUIObjC.virtualControllerGyro(controllerId, + deltaTimestamp: deltaTimestamp, + gyroX: x, gyroY: y, gyroZ: z, + accelX: accelX, accelY: accelY, accelZ: accelZ) + } + + + public func thumbstickMoved(analog: VirtualControllerAnalogType, x: Float, y: Float, controllerid: Int) { + appUIObjC.thumbstickMoved(analog, x: CGFloat(x), y: CGFloat(y), controllerId: Int32(controllerid)) + } + + public func virtualControllerButtonDown(button: VirtualControllerButtonType, controllerid: Int) { + appUIObjC.virtualControllerButtonDown(button, controllerId: Int32(controllerid)) + } + + public func virtualControllerButtonUp(button: VirtualControllerButtonType, controllerid: Int) { + appUIObjC.virtualControllerButtonUp(button, controllerId: Int32(controllerid)) + } + + public func settingsSaved() { + appUIObjC.settingsChanged() + } +} diff --git a/src/ios/AppUIGameInformation.h b/src/ios/AppUIGameInformation.h new file mode 100644 index 0000000000..1a602956c3 --- /dev/null +++ b/src/ios/AppUIGameInformation.h @@ -0,0 +1,26 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Jarrod Norwell +// SPDX-License-Identifier: GPL-2.0-or-later + +#import + +NS_ASSUME_NONNULL_BEGIN + +@interface AppUIInformation : NSObject +@property (nonatomic, strong) NSString *developer; +@property (nonatomic, strong) NSData *iconData; +@property (nonatomic) BOOL isHomebrew; +@property (nonatomic) uint64_t programID; +@property (nonatomic, strong) NSString *title, *version; + +-(AppUIInformation *) initWithDeveloper:(NSString *)developer iconData:(NSData *)iconData isHomebrew:(BOOL)isHomebrew programID:(uint64_t)programID title:(NSString *)title version:(NSString *)version; +@end + +@interface AppUIGameInformation : NSObject ++(AppUIGameInformation *) sharedInstance NS_SWIFT_NAME(shared()); + +-(AppUIInformation *) informationForGame:(NSURL *)url NS_SWIFT_NAME(information(for:)); +@end + +NS_ASSUME_NONNULL_END diff --git a/src/ios/AppUIGameInformation.mm b/src/ios/AppUIGameInformation.mm new file mode 100644 index 0000000000..5c0e603348 --- /dev/null +++ b/src/ios/AppUIGameInformation.mm @@ -0,0 +1,436 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Jarrod Norwell +// SPDX-License-Identifier: GPL-2.0-or-later + +#import +#import "AppUIGameInformation.h" +#import "EmulationSession.h" + +#include "common/fs/fs.h" +#include "common/fs/path_util.h" +#include "core/core.h" +#include "core/file_sys/fs_filesystem.h" +#include "core/file_sys/patch_manager.h" +#include "core/loader/loader.h" +#include "core/loader/nro.h" +#include "frontend_common/config.h" + +struct GameMetadata { + std::string title; + u64 programId; + std::string developer; + std::string version; + std::vector icon; + bool isHomebrew; +}; + + +class SdlConfig final : public Config { +public: + explicit SdlConfig(std::optional config_path); + ~SdlConfig() override; + + void ReloadAllValues() override; + void SaveAllValues() override; + +protected: + void ReadSdlValues(); + void ReadSdlPlayerValues(std::size_t player_index); + void ReadSdlControlValues(); + void ReadHidbusValues() override; + void ReadDebugControlValues() override; + void ReadPathValues() override {} + void ReadShortcutValues() override {} + void ReadUIValues() override {} + void ReadUIGamelistValues() override {} + void ReadUILayoutValues() override {} + void ReadMultiplayerValues() override {} + + void SaveSdlValues(); + void SaveSdlPlayerValues(std::size_t player_index); + void SaveSdlControlValues(); + void SaveHidbusValues() override; + void SaveDebugControlValues() override; + void SavePathValues() override {} + void SaveShortcutValues() override {} + void SaveUIValues() override {} + void SaveUIGamelistValues() override {} + void SaveUILayoutValues() override {} + void SaveMultiplayerValues() override {} + + std::vector& FindRelevantList(Settings::Category category) override; + +public: + static const std::array default_buttons; + static const std::array default_motions; + static const std::array, Settings::NativeAnalog::NumAnalogs> default_analogs; + static const std::array default_stick_mod; + static const std::array default_ringcon_analogs; +}; + + +#define SDL_MAIN_HANDLED +#include + +#include "common/logging.h" +#include "input_common/main.h" + +const std::array SdlConfig::default_buttons = { + SDL_SCANCODE_A, SDL_SCANCODE_S, SDL_SCANCODE_Z, SDL_SCANCODE_X, SDL_SCANCODE_T, + SDL_SCANCODE_G, SDL_SCANCODE_F, SDL_SCANCODE_H, SDL_SCANCODE_Q, SDL_SCANCODE_W, + SDL_SCANCODE_M, SDL_SCANCODE_N, SDL_SCANCODE_1, SDL_SCANCODE_2, SDL_SCANCODE_B, +}; + +const std::array SdlConfig::default_motions = { + SDL_SCANCODE_7, + SDL_SCANCODE_8, +}; + +const std::array, Settings::NativeAnalog::NumAnalogs> SdlConfig::default_analogs{ + { + { + SDL_SCANCODE_UP, + SDL_SCANCODE_DOWN, + SDL_SCANCODE_LEFT, + SDL_SCANCODE_RIGHT, + }, + { + SDL_SCANCODE_I, + SDL_SCANCODE_K, + SDL_SCANCODE_J, + SDL_SCANCODE_L, + }, + }}; + +const std::array SdlConfig::default_stick_mod = { + SDL_SCANCODE_D, + 0, +}; + +const std::array SdlConfig::default_ringcon_analogs{{ + 0, + 0, +}}; + +SdlConfig::SdlConfig(const std::optional config_path) { + Initialize(config_path); + ReadSdlValues(); + SaveSdlValues(); +} + +SdlConfig::~SdlConfig() { + if (global) { + SdlConfig::SaveAllValues(); + } +} + +void SdlConfig::ReloadAllValues() { + Reload(); + ReadSdlValues(); + SaveSdlValues(); +} + +void SdlConfig::SaveAllValues() { + SaveValues(); + SaveSdlValues(); +} + +void SdlConfig::ReadSdlValues() { + ReadSdlControlValues(); +} + +void SdlConfig::ReadSdlControlValues() { + BeginGroup(Settings::TranslateCategory(Settings::Category::Controls)); + + Settings::values.players.SetGlobal(!IsCustomConfig()); + for (std::size_t p = 0; p < Settings::values.players.GetValue().size(); ++p) { + ReadSdlPlayerValues(p); + } + if (IsCustomConfig()) { + EndGroup(); + return; + } + ReadDebugControlValues(); + ReadHidbusValues(); + + EndGroup(); +} + +void SdlConfig::ReadSdlPlayerValues(const std::size_t player_index) { + std::string player_prefix; + if (type != ConfigType::InputProfile) { + player_prefix.append("player_").append(ToString(player_index)).append("_"); + } + + auto& player = Settings::values.players.GetValue()[player_index]; + if (IsCustomConfig()) { + const auto profile_name = + ReadStringSetting(std::string(player_prefix).append("profile_name")); + if (profile_name.empty()) { + // Use the global input config + player = Settings::values.players.GetValue(true)[player_index]; + player.profile_name = ""; + return; + } + } + + for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) { + const std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]); + auto& player_buttons = player.buttons[i]; + + player_buttons = ReadStringSetting( + std::string(player_prefix).append(Settings::NativeButton::mapping[i]), default_param); + if (player_buttons.empty()) { + player_buttons = default_param; + } + } + + for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) { + const std::string default_param = InputCommon::GenerateAnalogParamFromKeys( + default_analogs[i][0], default_analogs[i][1], default_analogs[i][2], + default_analogs[i][3], default_stick_mod[i], 0.5f); + auto& player_analogs = player.analogs[i]; + + player_analogs = ReadStringSetting( + std::string(player_prefix).append(Settings::NativeAnalog::mapping[i]), default_param); + if (player_analogs.empty()) { + player_analogs = default_param; + } + } + + for (int i = 0; i < Settings::NativeMotion::NumMotions; ++i) { + const std::string default_param = InputCommon::GenerateKeyboardParam(default_motions[i]); + auto& player_motions = player.motions[i]; + + player_motions = ReadStringSetting( + std::string(player_prefix).append(Settings::NativeMotion::mapping[i]), default_param); + if (player_motions.empty()) { + player_motions = default_param; + } + } +} + +void SdlConfig::ReadDebugControlValues() { + for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) { + const std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]); + auto& debug_pad_buttons = Settings::values.debug_pad_buttons[i]; + debug_pad_buttons = ReadStringSetting( + std::string("debug_pad_").append(Settings::NativeButton::mapping[i]), default_param); + if (debug_pad_buttons.empty()) { + debug_pad_buttons = default_param; + } + } + for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) { + const std::string default_param = InputCommon::GenerateAnalogParamFromKeys( + default_analogs[i][0], default_analogs[i][1], default_analogs[i][2], + default_analogs[i][3], default_stick_mod[i], 0.5f); + auto& debug_pad_analogs = Settings::values.debug_pad_analogs[i]; + debug_pad_analogs = ReadStringSetting( + std::string("debug_pad_").append(Settings::NativeAnalog::mapping[i]), default_param); + if (debug_pad_analogs.empty()) { + debug_pad_analogs = default_param; + } + } +} + +void SdlConfig::ReadHidbusValues() { + const std::string default_param = InputCommon::GenerateAnalogParamFromKeys( + 0, 0, default_ringcon_analogs[0], default_ringcon_analogs[1], 0, 0.05f); + auto& ringcon_analogs = Settings::values.ringcon_analogs; + + ringcon_analogs = ReadStringSetting(std::string("ring_controller"), default_param); + if (ringcon_analogs.empty()) { + ringcon_analogs = default_param; + } +} + +void SdlConfig::SaveSdlValues() { + LOG_DEBUG(Config, "Saving SDL configuration values"); + SaveSdlControlValues(); + + WriteToIni(); +} + +void SdlConfig::SaveSdlControlValues() { + BeginGroup(Settings::TranslateCategory(Settings::Category::Controls)); + + Settings::values.players.SetGlobal(!IsCustomConfig()); + for (std::size_t p = 0; p < Settings::values.players.GetValue().size(); ++p) { + SaveSdlPlayerValues(p); + } + if (IsCustomConfig()) { + EndGroup(); + return; + } + SaveDebugControlValues(); + SaveHidbusValues(); + + EndGroup(); +} + +void SdlConfig::SaveSdlPlayerValues(const std::size_t player_index) { + std::string player_prefix; + if (type != ConfigType::InputProfile) { + player_prefix = std::string("player_").append(ToString(player_index)).append("_"); + } + + const auto& player = Settings::values.players.GetValue()[player_index]; + if (IsCustomConfig() && player.profile_name.empty()) { + // No custom profile selected + return; + } + + for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) { + const std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]); + WriteStringSetting(std::string(player_prefix).append(Settings::NativeButton::mapping[i]), + player.buttons[i], std::make_optional(default_param)); + } + for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) { + const std::string default_param = InputCommon::GenerateAnalogParamFromKeys( + default_analogs[i][0], default_analogs[i][1], default_analogs[i][2], + default_analogs[i][3], default_stick_mod[i], 0.5f); + WriteStringSetting(std::string(player_prefix).append(Settings::NativeAnalog::mapping[i]), + player.analogs[i], std::make_optional(default_param)); + } + for (int i = 0; i < Settings::NativeMotion::NumMotions; ++i) { + const std::string default_param = InputCommon::GenerateKeyboardParam(default_motions[i]); + WriteStringSetting(std::string(player_prefix).append(Settings::NativeMotion::mapping[i]), + player.motions[i], std::make_optional(default_param)); + } +} + +void SdlConfig::SaveDebugControlValues() { + for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) { + const std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]); + WriteStringSetting(std::string("debug_pad_").append(Settings::NativeButton::mapping[i]), + Settings::values.debug_pad_buttons[i], + std::make_optional(default_param)); + } + for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) { + const std::string default_param = InputCommon::GenerateAnalogParamFromKeys( + default_analogs[i][0], default_analogs[i][1], default_analogs[i][2], + default_analogs[i][3], default_stick_mod[i], 0.5f); + WriteStringSetting(std::string("debug_pad_").append(Settings::NativeAnalog::mapping[i]), + Settings::values.debug_pad_analogs[i], + std::make_optional(default_param)); + } +} + +void SdlConfig::SaveHidbusValues() { + const std::string default_param = InputCommon::GenerateAnalogParamFromKeys( + 0, 0, default_ringcon_analogs[0], default_ringcon_analogs[1], 0, 0.05f); + WriteStringSetting(std::string("ring_controller"), Settings::values.ringcon_analogs, + std::make_optional(default_param)); +} + +std::vector& SdlConfig::FindRelevantList(Settings::Category category) { + return Settings::values.linkage.by_category[category]; +} + + + + + + + +std::unordered_map m_game_metadata_cache; + +GameMetadata CacheGameMetadata(const std::string& path) { + const auto file = + Core::GetGameFileFromPath(EmulationSession::GetInstance().System().GetFilesystem(), path); + auto loader = Loader::GetLoader(EmulationSession::GetInstance().System(), file, 0, 0); + + GameMetadata entry; + loader->ReadTitle(entry.title); + loader->ReadProgramId(entry.programId); + loader->ReadIcon(entry.icon); + + const FileSys::PatchManager pm{ + entry.programId, EmulationSession::GetInstance().System().GetFileSystemController(), + EmulationSession::GetInstance().System().GetContentProvider()}; + const auto control = pm.GetControlMetadata(); + + if (control.first != nullptr) { + entry.developer = control.first->GetDeveloperName(); + entry.version = control.first->GetVersionString(); + } else { + FileSys::NACP nacp; + if (loader->ReadControlData(nacp) == Loader::ResultStatus::Success) { + entry.developer = nacp.GetDeveloperName(); + } else { + entry.developer = ""; + } + + entry.version = "1.0.0"; + } + + if (loader->GetFileType() == Loader::FileType::NRO) { + auto loader_nro = reinterpret_cast(loader.get()); + entry.isHomebrew = loader_nro->IsHomebrew(); + } else { + entry.isHomebrew = false; + } + + m_game_metadata_cache[path] = entry; + + return entry; +} + +GameMetadata GameMetadata(const std::string& path, bool reload = false) { + if (!EmulationSession::GetInstance().IsInitialized()) { + NSURL *dir_url = [[[NSFileManager defaultManager] URLsForDirectory:NSDocumentDirectory inDomains:NSUserDomainMask] firstObject]; + const char *directory_cstr = [[dir_url path] UTF8String]; + Common::FS::SetAppDirectory(directory_cstr); + + EmulationSession::GetInstance().System().Initialize(); + EmulationSession::GetInstance().InitializeSystem(false); + } + + if (reload) { + return CacheGameMetadata(path); + } + + if (auto search = m_game_metadata_cache.find(path); search != m_game_metadata_cache.end()) { + return search->second; + } + + return CacheGameMetadata(path); +} + + +@implementation AppUIInformation +-(AppUIInformation *) initWithDeveloper:(NSString *)developer iconData:(NSData *)iconData isHomebrew:(BOOL)isHomebrew programID:(uint64_t)programID + title:(NSString *)title version:(NSString *)version { + if (self = [super init]) { + self.developer = developer; + self.iconData = iconData; + self.isHomebrew = isHomebrew; + self.programID = programID; + self.title = title; + self.version = version; + } return self; +} +@end + +@implementation AppUIGameInformation ++(AppUIGameInformation *) sharedInstance { + static AppUIGameInformation *sharedInstance = NULL; + static dispatch_once_t onceToken; + dispatch_once(&onceToken, ^{ + sharedInstance = [[self alloc] init]; + }); + return sharedInstance; +} + + +-(AppUIInformation *) informationForGame:(NSURL *)url { + auto gameMetadata = GameMetadata([url.path UTF8String]); + + return [[AppUIInformation alloc] initWithDeveloper:[NSString stringWithCString:gameMetadata.developer.c_str() encoding:NSUTF8StringEncoding] + iconData:[NSData dataWithBytes:gameMetadata.icon.data() length:gameMetadata.icon.size()] + isHomebrew:gameMetadata.isHomebrew programID:gameMetadata.programId + title:[NSString stringWithCString:gameMetadata.title.c_str() encoding:NSUTF8StringEncoding] + version:[NSString stringWithCString:gameMetadata.version.c_str() encoding:NSUTF8StringEncoding]]; +} +@end diff --git a/src/ios/AppUIObjC.h b/src/ios/AppUIObjC.h new file mode 100644 index 0000000000..942cd111a7 --- /dev/null +++ b/src/ios/AppUIObjC.h @@ -0,0 +1,93 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Jarrod Norwell +// SPDX-License-Identifier: GPL-2.0-or-later + +#import +#import +#import + +#import "AppUIGameInformation.h" + +NS_ASSUME_NONNULL_BEGIN + +typedef NS_ENUM(NSUInteger, VirtualControllerAnalogType) { + VirtualControllerAnalogTypeLeft = 0, + VirtualControllerAnalogTypeRight = 1 +}; + +typedef NS_ENUM(NSUInteger, VirtualControllerButtonType) { + VirtualControllerButtonTypeA = 0, + VirtualControllerButtonTypeB = 1, + VirtualControllerButtonTypeX = 2, + VirtualControllerButtonTypeY = 3, + VirtualControllerButtonTypeL = 4, + VirtualControllerButtonTypeR = 5, + VirtualControllerButtonTypeTriggerL = 6, + VirtualControllerButtonTypeTriggerR = 7, + VirtualControllerButtonTypeTriggerZL = 8, + VirtualControllerButtonTypeTriggerZR = 9, + VirtualControllerButtonTypePlus = 10, + VirtualControllerButtonTypeMinus = 11, + VirtualControllerButtonTypeDirectionalPadLeft = 12, + VirtualControllerButtonTypeDirectionalPadUp = 13, + VirtualControllerButtonTypeDirectionalPadRight = 14, + VirtualControllerButtonTypeDirectionalPadDown = 15, + VirtualControllerButtonTypeSL = 16, + VirtualControllerButtonTypeSR = 17, + VirtualControllerButtonTypeHome = 18, + VirtualControllerButtonTypeCapture = 19 +}; + +@interface AppUIObjC : NSObject { + CAMetalLayer *_layer; + CGSize _size; +} + +@property (nonatomic, strong) AppUIGameInformation *gameInformation; + ++(AppUIObjC *) sharedInstance NS_SWIFT_NAME(shared()); +-(void) configureLayer:(CAMetalLayer *)layer withSize:(CGSize)size NS_SWIFT_NAME(configure(layer:with:)); +-(void) bootOS; +-(void) pause; +-(void) play; +-(BOOL) ispaused; +-(BOOL) canGetFullPath; +-(void) quit; +-(void) insertGame:(NSURL *)url NS_SWIFT_NAME(insert(game:)); +-(void) insertGames:(NSArray *)games NS_SWIFT_NAME(insert(games:)); +-(void) step; +-(BOOL) hasfirstfame; + +-(void) touchBeganAtPoint:(CGPoint)point index:(NSUInteger)index NS_SWIFT_NAME(touchBegan(at:for:)); +-(void) touchEndedForIndex:(NSUInteger)index; +-(void) touchMovedAtPoint:(CGPoint)point index:(NSUInteger)index NS_SWIFT_NAME(touchMoved(at:for:)); + +-(void) thumbstickMoved:(VirtualControllerAnalogType)analog + x:(CGFloat)x + y:(CGFloat)y + controllerId:(int)controllerId; + +-(void) virtualControllerGyro:(int)controllerId + deltaTimestamp:(int)delta_timestamp + gyroX:(float)gyro_x + gyroY:(float)gyro_y + gyroZ:(float)gyro_z + accelX:(float)accel_x + accelY:(float)accel_y + accelZ:(float)accel_z; + +-(void) virtualControllerButtonDown:(VirtualControllerButtonType)button + controllerId:(int)controllerId; + +-(void) virtualControllerButtonUp:(VirtualControllerButtonType)button + controllerId:(int)controllerId; + + +-(void) orientationChanged:(UIInterfaceOrientation)orientation with:(CAMetalLayer *)layer size:(CGSize)size NS_SWIFT_NAME(orientationChanged(orientation:with:size:)); + +-(void) settingsChanged; + +@end + +NS_ASSUME_NONNULL_END diff --git a/src/ios/AppUIObjC.mm b/src/ios/AppUIObjC.mm new file mode 100644 index 0000000000..32bffd43ca --- /dev/null +++ b/src/ios/AppUIObjC.mm @@ -0,0 +1,251 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Jarrod Norwell +// SPDX-License-Identifier: GPL-2.0-or-later + +#import "AppUIObjC.h" + +#import "Config.h" +#import "EmulationSession.h" + +#include "common/fs/fs.h" +#include "common/fs/path_util.h" +#include "common/settings.h" +#include "common/fs/fs.h" +#include "core/file_sys/patch_manager.h" +#include "core/file_sys/savedata_factory.h" +#include "core/loader/nro.h" +#include "frontend_common/content_manager.h" +#include "common/settings_enums.h" +#include "network/announce_multiplayer_session.h" +#include "common/announce_multiplayer_room.h" +#include "network/network.h" + +#include "common/detached_tasks.h" +#include "common/dynamic_library.h" +#include "common/fs/path_util.h" +#include "common/logging.h" +#include "common/scope_exit.h" +#include "common/settings.h" +#include "common/string_util.h" +#include "core/core.h" +#include "core/cpu_manager.h" +#include "core/crypto/key_manager.h" +#include "core/file_sys/card_image.h" +#include "core/file_sys/content_archive.h" +#include "core/file_sys/fs_filesystem.h" +#include "core/file_sys/submission_package.h" +#include "core/file_sys/vfs/vfs.h" +#include "core/file_sys/vfs/vfs_real.h" +#include "core/frontend/applets/cabinet.h" +#include "core/frontend/applets/controller.h" +#include "core/frontend/applets/error.h" +#include "core/frontend/applets/general.h" +#include "core/frontend/applets/mii_edit.h" +#include "core/frontend/applets/profile_select.h" +#include "core/frontend/applets/software_keyboard.h" +#include "core/frontend/applets/web_browser.h" +#include "core/hle/service/am/applet_manager.h" +#include "core/hle/service/am/frontend/applets.h" +#include "core/hle/service/filesystem/filesystem.h" +#include "core/loader/loader.h" +#include "hid_core/hid_core.h" +#include "hid_core/hid_types.h" +#include "video_core/renderer_base.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/vulkan_common/vulkan_instance.h" +#include "video_core/vulkan_common/vulkan_surface.h" + + +#import + +@implementation AppUIObjC +-(AppUIObjC *) init { + if (self = [super init]) { + _gameInformation = [AppUIGameInformation sharedInstance]; + + NSURL *dir_url = [[[NSFileManager defaultManager] URLsForDirectory:NSDocumentDirectory inDomains:NSUserDomainMask] firstObject]; + const char *directory_cstr = [[dir_url path] UTF8String]; + + Common::FS::SetAppDirectory(directory_cstr); + // Config{"config", Config::ConfigType::GlobalConfig}; + + EmulationSession::GetInstance().System().Initialize(); + EmulationSession::GetInstance().InitializeSystem(false); + EmulationSession::GetInstance().InitializeGpuDriver(); + + + Settings::values.dump_shaders.SetValue(true); + Settings::values.use_asynchronous_shaders.SetValue(true); + // Settings::values.astc_recompression.SetValue(Settings::AstcRecompression::Bc3); + // Settings::values.resolution_setup.SetValue(Settings::ResolutionSetup::Res1X); + // Settings::values.scaling_filter.SetValue(Settings::ScalingFilter::Bilinear); + } return self; +} + + ++(AppUIObjC *) sharedInstance { + static AppUIObjC *sharedInstance = NULL; + static dispatch_once_t onceToken; + dispatch_once(&onceToken, ^{ + sharedInstance = [[self alloc] init]; + }); + return sharedInstance; +} + +- (BOOL)ispaused { + return EmulationSession::GetInstance().IsPaused(); +} + +-(void) pause { + EmulationSession::GetInstance().System().Pause(); + EmulationSession::GetInstance().HaltEmulation(); + EmulationSession::GetInstance().PauseEmulation(); +} + +-(void) play { + + EmulationSession::GetInstance().System().Run(); + EmulationSession::GetInstance().RunEmulation(); + EmulationSession::GetInstance().UnPauseEmulation(); +} + +-(BOOL)hasfirstfame { + @try { + auto* window = &EmulationSession::GetInstance().Window(); + if (window && window->HasFirstFrame()) { + return YES; + } + } + @catch (NSException *exception) { + NSLog(@"Exception occurred: %@", exception); + // Handle the exception, maybe return a default value + return NO; + } + return NO; +} + +- (BOOL)canGetFullPath { + @try { + Core::System& system = EmulationSession::GetInstance().System(); + auto bis_system = system.GetFileSystemController().GetSystemNANDContents(); + + if (bis_system == nullptr) { + return NO; + } + + constexpr u64 QLaunchId = static_cast(Service::AM::AppletProgramId::QLaunch); + auto qlaunch_applet_nca = bis_system->GetEntry(QLaunchId, FileSys::ContentRecordType::Program); + + if (qlaunch_applet_nca == nullptr) { + return NO; + } + + const auto filename = qlaunch_applet_nca->GetFullPath(); + + // If GetFullPath() is successful + return YES; + } @catch (NSException *exception) { + // Handle the exception if needed + return NO; + } +} + +-(void) quit { + EmulationSession::GetInstance().ShutdownEmulation(); +} + +-(void) configureLayer:(CAMetalLayer *)layer withSize:(CGSize)size { + _layer = layer; + _size = size; + EmulationSession::GetInstance().SetNativeWindow(layer, size); +} + +-(void) bootOS { + EmulationSession::GetInstance().BootOS(); +} + +-(void) insertGame:(NSURL *)url { + EmulationSession::GetInstance().InitializeEmulation([url.path UTF8String], [_gameInformation informationForGame:url].programID, true); +} + +-(void) insertGames:(NSArray *)games { + for (NSURL *url in games) { + EmulationSession::GetInstance().ConfigureFilesystemProvider([url.path UTF8String]); + } +} + +-(void) step { + void(EmulationSession::GetInstance().System().Run()); +} + +-(void) touchBeganAtPoint:(CGPoint)point index:(NSUInteger)index { + float h_ratio, w_ratio; + h_ratio = EmulationSession::GetInstance().Window().GetFramebufferLayout().height / (_size.height * [[UIScreen mainScreen] nativeScale]); + w_ratio = EmulationSession::GetInstance().Window().GetFramebufferLayout().width / (_size.width * [[UIScreen mainScreen] nativeScale]); + + EmulationSession::GetInstance().Window().OnTouchPressed([[NSNumber numberWithUnsignedInteger:index] intValue], + (point.x) * [[UIScreen mainScreen] nativeScale] * w_ratio, + ((point.y) * [[UIScreen mainScreen] nativeScale] * h_ratio)); +} + +-(void) touchEndedForIndex:(NSUInteger)index { + EmulationSession::GetInstance().Window().OnTouchReleased([[NSNumber numberWithUnsignedInteger:index] intValue]); +} + +-(void) touchMovedAtPoint:(CGPoint)point index:(NSUInteger)index { + float h_ratio, w_ratio; + h_ratio = EmulationSession::GetInstance().Window().GetFramebufferLayout().height / (_size.height * [[UIScreen mainScreen] nativeScale]); + w_ratio = EmulationSession::GetInstance().Window().GetFramebufferLayout().width / (_size.width * [[UIScreen mainScreen] nativeScale]); + + EmulationSession::GetInstance().Window().OnTouchMoved([[NSNumber numberWithUnsignedInteger:index] intValue], + (point.x) * [[UIScreen mainScreen] nativeScale] * w_ratio, + ((point.y) * [[UIScreen mainScreen] nativeScale] * h_ratio)); +} + +-(void) thumbstickMoved:(VirtualControllerAnalogType)analog + x:(CGFloat)x + y:(CGFloat)y + controllerId:(int)controllerId { + EmulationSession::GetInstance().OnGamepadConnectEvent(controllerId); + EmulationSession::GetInstance().Window().OnGamepadJoystickEvent(controllerId, [[NSNumber numberWithUnsignedInteger:analog] intValue], CGFloat(x), CGFloat(y)); +} + +-(void) virtualControllerButtonDown:(VirtualControllerButtonType)button + controllerId:(int)controllerId { + EmulationSession::GetInstance().OnGamepadConnectEvent(controllerId); + EmulationSession::GetInstance().Window().OnGamepadButtonEvent(controllerId, [[NSNumber numberWithUnsignedInteger:button] intValue], true); +} + +-(void) virtualControllerGyro:(int)controllerId + deltaTimestamp:(int)delta_timestamp + gyroX:(float)gyro_x + gyroY:(float)gyro_y + gyroZ:(float)gyro_z + accelX:(float)accel_x + accelY:(float)accel_y + accelZ:(float)accel_z +{ + EmulationSession::GetInstance().OnGamepadConnectEvent(controllerId); + EmulationSession::GetInstance().Window().OnGamepadMotionEvent(controllerId, delta_timestamp, gyro_x, gyro_y, gyro_z, accel_x, accel_y, accel_z); +} + + +-(void) virtualControllerButtonUp:(VirtualControllerButtonType)button + controllerId:(int)controllerId { + EmulationSession::GetInstance().OnGamepadConnectEvent(controllerId); + EmulationSession::GetInstance().Window().OnGamepadButtonEvent(controllerId, [[NSNumber numberWithUnsignedInteger:button] intValue], false); +} + + +-(void) orientationChanged:(UIInterfaceOrientation)orientation with:(CAMetalLayer *)layer size:(CGSize)size { + _layer = layer; + _size = size; + EmulationSession::GetInstance().Window().OnSurfaceChanged(layer, size); +} + +-(void) settingsChanged { + // +} + +@end diff --git a/src/ios/BootOSView.swift b/src/ios/BootOSView.swift new file mode 100644 index 0000000000..5af5d55145 --- /dev/null +++ b/src/ios/BootOSView.swift @@ -0,0 +1,26 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import SwiftUI +import AppUI + +struct BootOSView: View { + @Binding var core: Core + @Binding var currentnavigarion: Int + @State var appui = AppUI.shared + @AppStorage("cangetfullpath") var canGetFullPath: Bool = false + var body: some View { + if (appui.canGetFullPath() -- canGetFullPath) { + EmulationView(game: nil) + } else { + VStack { + Text("Unable Launch Switch OS") + .font(.largeTitle) + .padding() + Text("You do not have the Switch Home Menu Files Needed to launch the Ηome Menu") + } + } + } +} diff --git a/src/ios/CMakeLists.txt b/src/ios/CMakeLists.txt new file mode 100644 index 0000000000..5d979c2ce5 --- /dev/null +++ b/src/ios/CMakeLists.txt @@ -0,0 +1,98 @@ +# SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +# SPDX-License-Identifier: GPL-3.0-or-later + +enable_language(Swift OBJCXX) +add_executable(eden-ios + AppUI-Bridging-Header.h + AppUI.swift + AppUIGameInformation.h + AppUIGameInformation.mm + AppUIObjC.h + AppUIObjC.mm + Config.h + Config.mm + EmulationSession.h + EmulationSession.mm + EmulationWindow.h + EmulationWindow.mm + VMA.cpp + + EnableJIT.swift + EmulationGame.swift + JoystickView.swift + CoreSettingsView.swift + ContentView.swift + EmulationHandler.swift + DetectServer.swift + NavView.swift + PomeloApp.swift + SettingsView.swift + FileManager.swift + EmulationView.swift + LibraryView.swift + GameButtonListView.swift + KeyboardHostingController.swift + MetalView.swift + BootOSView.swift + ControllerView.swift + AppUI.swift + InfoView.swift + FolderMonitor.swift + AdvancedSettingsView.swift + GameButtonView.swift + AppIconProvider.swift + Haptics.swift + EmulationScreenView.swift + GameListView.swift +) + +set(MACOSX_BUNDLE_GUI_IDENTIFIER "dev.eden-emu.eden") +set(MACOSX_BUNDLE_BUNDLE_NAME "Eden") +set(MACOSX_BUNDLE_INFO_STRING "Eden: A high-performance Nintendo Switch emulator") + +# TODO(crueter): Copyright, and versioning + +# Keep bundle identifier as-is, for compatibility sake +set_target_properties(eden-ios PROPERTIES + XCODE_ATTRIBUTE_SWIFT_OBJC_BRIDGING_HEADER "${CMAKE_CURRENT_SOURCE_DIR}/AppUI-Bridging-Header.h" + XCODE_ATTRIBUTE_SWIFT_OBJC_INTERFACE_HEADER_NAME "eden-ios-Swift.h" + XCODE_ATTRIBUTE_DERIVED_FILE_DIR "${CMAKE_CURRENT_BINARY_DIR}") + +target_link_libraries(eden-ios PRIVATE common core input_common frontend_common video_core sirit::sirit) +target_link_libraries(eden-ios PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads) +target_link_libraries(eden-ios PRIVATE SDL2::SDL2 glad stb::headers) +create_target_directory_groups(eden-ios) + +# FIXME(crueter): This should /all/ be in a module of some kind! + +# Xcode will automatically generate the Assets.car and icns file for us. +set(_dist "${CMAKE_SOURCE_DIR}/dist") +if (CMAKE_GENERATOR MATCHES "Xcode") + set(_icons "${_dist}/eden.icon") + + set_target_properties(eden-ios PROPERTIES + XCODE_ATTRIBUTE_ASSETCATALOG_COMPILER_APPICON_NAME eden + MACOSX_BUNDLE_ICON_FILE eden + # Also force xcode to manage signing for us. + XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED ON + XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED ON + XCODE_ATTRIBUTE_CODE_SIGN_STYLE Automatic) +# Otherwise, we'll use our own. +else() + set(_icons "${_dist}/eden.icns" "${_dist}/Assets.car") +endif() + +set_source_files_properties(${_icons} PROPERTIES + MACOSX_PACKAGE_LOCATION Resources) +target_sources(eden-ios PRIVATE ${_icons}) + +set_target_properties(eden-ios PROPERTIES MACOSX_BUNDLE TRUE) + +set(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib") +find_library(MOLTENVK_LIBRARY MoltenVK REQUIRED) +message(STATUS "Using MoltenVK at ${MOLTENVK_LIBRARY}.") + +set_source_files_properties(${MOLTENVK_LIBRARY} PROPERTIES + MACOSX_PACKAGE_LOCATION Frameworks + XCODE_FILE_ATTRIBUTES "CodeSignOnCopy") +target_sources(eden-ios PRIVATE ${MOLTENVK_LIBRARY}) diff --git a/src/ios/Config.h b/src/ios/Config.h new file mode 100644 index 0000000000..4adc23fe44 --- /dev/null +++ b/src/ios/Config.h @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include +#include "common/common_types.h" +#include "common/settings_setting.h" +#include "common/settings_enums.h" + +namespace IOSSettings { + struct Values { + Settings::Linkage linkage; + Settings::Setting touchscreen{linkage, true, "touchscreen", Settings::Category::Overlay}; + }; + + extern Values values; + +} // namespace IOSSettings diff --git a/src/ios/Config.mm b/src/ios/Config.mm new file mode 100644 index 0000000000..5b063d79f9 --- /dev/null +++ b/src/ios/Config.mm @@ -0,0 +1,11 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + + +#include "Config.h" + +namespace IOSSettings { + +Values values; + +} // namespace IOSSettings diff --git a/src/ios/ContentView.swift b/src/ios/ContentView.swift new file mode 100644 index 0000000000..07d41f89e6 --- /dev/null +++ b/src/ios/ContentView.swift @@ -0,0 +1,15 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import SwiftUI +import AppUI + +struct ContentView: View { +@State var core = Core(games: [], root: FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]) + var body: some View { + HomeView(core: core).onAppear() { + } + } +} diff --git a/src/ios/ControllerView.swift b/src/ios/ControllerView.swift new file mode 100644 index 0000000000..a19cdffd28 --- /dev/null +++ b/src/ios/ControllerView.swift @@ -0,0 +1,420 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import SwiftUI +import GameController +import AppUI +import SwiftUIJoystick + +struct ControllerView: View { + let appui = AppUI.shared + @State var isPressed = false + @State var controllerconnected = false + @State private var x: CGFloat = 0.0 + @State private var y: CGFloat = 0.0 + @Environment(\.presentationMode) var presentationMode + + var body: some View { + GeometryReader { geometry in + ZStack { + if !controllerconnected { + OnScreenController(geometry: geometry) // i did this to clean it up as it was quite long lmfao + } + } + } + .onAppear { + print("checking for controller:") + controllerconnected = false + DispatchQueue.main.async { + setupControllers() // i dont know what half of this shit does + } + } + } + + // Add a dictionary to track controller IDs + @State var controllerIDs: [GCController: Int] = [:] + + private func setupControllers() { + NotificationCenter.default.addObserver(forName: .GCControllerDidConnect, object: nil, queue: .main) { notification in + if let controller = notification.object as? GCController { + print("wow controller onstart") // yippeeee + self.setupController(controller) + self.controllerconnected = true + } else { + print("not GCController :((((((") // wahhhhhhh + } + } + + + NotificationCenter.default.addObserver(forName: .GCControllerDidDisconnect, object: nil, queue: .main) { notification in + if let controller = notification.object as? GCController { + print("wow controller gone") + if self.controllerIDs.isEmpty { + controllerconnected = false + } + self.controllerIDs.removeValue(forKey: controller) // Remove the controller ID + } + } + + GCController.controllers().forEach { controller in + print("wow controller") + self.controllerconnected = true + self.setupController(controller) + } + } + + private func setupController(_ controller: GCController) { + // Assign a unique ID to the controller, max 5 controllers + if controllerIDs.count < 6, controllerIDs[controller] == nil { + controllerIDs[controller] = controllerIDs.count + } + + guard let controllerId = controllerIDs[controller] else { return } + + if let extendedGamepad = controller.extendedGamepad { + + // Handle extended gamepad + extendedGamepad.dpad.up.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.directionalPadUp, controllerId: controllerId) : self.touchUpInside(.directionalPadUp, controllerId: controllerId) + } + + extendedGamepad.dpad.down.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.directionalPadDown, controllerId: controllerId) : self.touchUpInside(.directionalPadDown, controllerId: controllerId) + } + extendedGamepad.dpad.left.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.directionalPadLeft, controllerId: controllerId) : self.touchUpInside(.directionalPadLeft, controllerId: controllerId) + } + extendedGamepad.dpad.right.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.directionalPadRight, controllerId: controllerId) : self.touchUpInside(.directionalPadRight, controllerId: controllerId) + } + extendedGamepad.buttonOptions?.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.minus, controllerId: controllerId) : self.touchUpInside(.minus, controllerId: controllerId) + } + extendedGamepad.buttonMenu.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.plus, controllerId: controllerId) : self.touchUpInside(.plus, controllerId: controllerId) + } + extendedGamepad.buttonA.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.A, controllerId: controllerId) : self.touchUpInside(.A, controllerId: controllerId) + } + extendedGamepad.buttonB.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.B, controllerId: controllerId) : self.touchUpInside(.B, controllerId: controllerId) + } + extendedGamepad.buttonX.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.X, controllerId: controllerId) : self.touchUpInside(.X, controllerId: controllerId) + } + extendedGamepad.buttonY.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.Y, controllerId: controllerId) : self.touchUpInside(.Y, controllerId: controllerId) + } + extendedGamepad.leftShoulder.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.triggerL, controllerId: controllerId) : self.touchUpInside(.L, controllerId: controllerId) + } + extendedGamepad.leftTrigger.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.triggerZL, controllerId: controllerId) : self.touchUpInside(.triggerZL, controllerId: controllerId) + } + extendedGamepad.rightShoulder.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.triggerR, controllerId: controllerId) : self.touchUpInside(.triggerR, controllerId: controllerId) + } + extendedGamepad.leftThumbstickButton?.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.L, controllerId: controllerId) : self.touchUpInside(.triggerR, controllerId: controllerId) + } + extendedGamepad.rightThumbstickButton?.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.R, controllerId: controllerId) : self.touchUpInside(.triggerR, controllerId: controllerId) + } + extendedGamepad.rightTrigger.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.triggerZR, controllerId: controllerId) : self.touchUpInside(.triggerZR, controllerId: controllerId) + } + extendedGamepad.buttonHome?.pressedChangedHandler = { button, value, pressed in + if pressed { + appui.exit() + presentationMode.wrappedValue.dismiss() + } + } + extendedGamepad.leftThumbstick.valueChangedHandler = { dpad, x, y in + self.appui.thumbstickMoved(analog: .left, x: x, y: y, controllerid: controllerId) + } + + extendedGamepad.rightThumbstick.valueChangedHandler = { dpad, x, y in + self.appui.thumbstickMoved(analog: .right, x: x, y: y, controllerid: controllerId) + } + + if let motion = controller.motion { + var lastTimestamp = Date().timeIntervalSince1970 // Initialize timestamp when motion starts + + motion.valueChangedHandler = { motion in + // Get current time + let currentTimestamp = Date().timeIntervalSince1970 + let deltaTimestamp = Int32((currentTimestamp - lastTimestamp) * 1000) // Difference in milliseconds + + // Update last timestamp + lastTimestamp = currentTimestamp + + // Get gyroscope data + let gyroX = motion.rotationRate.x + let gyroY = motion.rotationRate.y + let gyroZ = motion.rotationRate.z + + // Get accelerometer data + let accelX = motion.gravity.x + motion.userAcceleration.x + let accelY = motion.gravity.y + motion.userAcceleration.y + let accelZ = motion.gravity.z + motion.userAcceleration.z + + print("\(gyroX), \(gyroY), \(gyroZ), \(accelX), \(accelY), \(accelZ)") + + // Call your gyroMoved function with the motion data + appui.gyroMoved(x: Float(gyroX), y: Float(gyroY), z: Float(gyroZ), accelX: Float(accelX), accelY: Float(accelY), accelZ: Float(accelZ), controllerId: Int32(controllerId), deltaTimestamp: Int32(lastTimestamp)) + } + } + } else if let microGamepad = controller.microGamepad { + // Handle micro gamepad + microGamepad.dpad.up.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.directionalPadUp, controllerId: controllerId) : self.touchUpInside(.directionalPadUp, controllerId: controllerId) + } + microGamepad.dpad.down.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.directionalPadDown, controllerId: controllerId) : self.touchUpInside(.directionalPadDown, controllerId: controllerId) + } + microGamepad.dpad.left.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.directionalPadLeft, controllerId: controllerId) : self.touchUpInside(.directionalPadLeft, controllerId: controllerId) + } + microGamepad.dpad.right.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.directionalPadRight, controllerId: controllerId) : self.touchUpInside(.directionalPadRight, controllerId: controllerId) + } + microGamepad.buttonA.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.A, controllerId: controllerId) : self.touchUpInside(.A, controllerId: controllerId) + } + microGamepad.buttonX.pressedChangedHandler = { button, value, pressed in + pressed ? self.touchDown(.X, controllerId: controllerId) : self.touchUpInside(.X, controllerId: controllerId) + } + } + } + + private func touchDown(_ button: VirtualControllerButtonType, controllerId: Int) { + appui.virtualControllerButtonDown(button: button, controllerid: controllerId) } + + private func touchUpInside(_ button: VirtualControllerButtonType, controllerId: Int) { + appui.virtualControllerButtonUp(button: button, controllerid: controllerId) + } +} + +struct OnScreenController: View { + @State var geometry: GeometryProxy + var body: some View { + if geometry.size.height > geometry.size.width && UIDevice.current.userInterfaceIdiom != .pad { + // portrait + VStack { + Spacer() + VStack { + HStack { + VStack { + ShoulderButtonsViewLeft() + ZStack { + Joystick() + DPadView() + } + } + .padding() + VStack { + ShoulderButtonsViewRight() + ZStack { + Joystick(iscool: true) // hope this works + ABXYView() + } + } + .padding() + } + HStack { + ButtonView(button: .plus).padding(.horizontal, 40) + ButtonView(button: .minus).padding(.horizontal, 40) + } + } + .padding(.bottom, geometry.size.height / 3.2) // very broken + } + } else { + // could be landscape + VStack { + HStack { + Spacer() + ButtonView(button: .home) + .padding(.horizontal) + } + Spacer() + VStack { + HStack { + + // gotta fuckin add + and - now + VStack { + ShoulderButtonsViewLeft() + ZStack { + Joystick() + DPadView() + } + } + HStack { + Spacer() + VStack { + Spacer() + ButtonView(button: .plus) // Adding the + button + } + VStack { + Spacer() + ButtonView(button: .minus) // Adding the - button + } + Spacer() + } + VStack { + ShoulderButtonsViewRight() + ZStack { + Joystick(iscool: true) // hope this work s + ABXYView() + } + } + } + + } + .padding(.bottom, geometry.size.height / 11) // also extremally broken ( + } + } + } +} + +struct ShoulderButtonsViewLeft: View { + var body: some View { + HStack { + ButtonView(button: .triggerZL) + .padding(.horizontal) + ButtonView(button: .triggerL) + .padding(.horizontal) + } + .frame(width: 160, height: 20) + } +} + +struct ShoulderButtonsViewRight: View { + var body: some View { + HStack { + ButtonView(button: .triggerR) + .padding(.horizontal) + ButtonView(button: .triggerZR) + .padding(.horizontal) + } + .frame(width: 160, height: 20) + + } +} + +struct DPadView: View { + var body: some View { + VStack { + ButtonView(button: .directionalPadUp) + HStack { + ButtonView(button: .directionalPadLeft) + Spacer(minLength: 20) + ButtonView(button: .directionalPadRight) + } + ButtonView(button: .directionalPadDown) + .padding(.horizontal) + } + .frame(width: 145, height: 145) + } +} + +struct ABXYView: View { + var body: some View { + VStack { + ButtonView(button: .X) + HStack { + ButtonView(button: .Y) + Spacer(minLength: 20) + ButtonView(button: .A) + } + ButtonView(button: .B) + .padding(.horizontal) + } + .frame(width: 145, height: 145) + } +} + +struct ButtonView: View { + var button: VirtualControllerButtonType + @StateObject private var viewModel: EmulationViewModel = EmulationViewModel(game: nil) + let appui = AppUI.shared + @State var mtkView: MTKView? + @State var width: CGFloat = 45 + @State var height: CGFloat = 45 + @State var isPressed = false + var id: Int { + if onscreenjoy { + return 8 + } + return 0 + } + @AppStorage("onscreenhandheld") var onscreenjoy: Bool = false + @Environment(\.colorScheme) var colorScheme + @Environment(\.presentationMode) var presentationMode + + var body: some View { + Image(systemName: buttonText) + .resizable() + .frame(width: width, height: height) + .foregroundColor(colorScheme == .dark ? Color.gray : Color.gray) + .opacity(isPressed ? 0.5 : 1) + .gesture( + DragGesture(minimumDistance: 0) + .onChanged { _ in + if !self.isPressed { + self.isPressed = true + DispatchQueue.main.async { + if button == .home { + presentationMode.wrappedValue.dismiss() + appui.exit() + } else { + appui.virtualControllerButtonDown(button: button, controllerid: id) + Haptics.shared.play(.heavy) + } + } + } + } + .onEnded { _ in + self.isPressed = false + DispatchQueue.main.async { + if button != .home { + appui.virtualControllerButtonUp(button: button, controllerid: id) + } + } + } + ) + .onAppear() { + if button == .triggerL || button == .triggerZL || button == .triggerZR || button == .triggerR { + width = 65 + } + + + if button == .minus || button == .plus || button == .home { + width = 35 + height = 35 + } + } + } + + private var buttonText: String { + switch button { + case .A: return "a.circle.fill" + case .B: return "b.circle.fill" + case .X: return "x.circle.fill" + case .Y: return "y.circle.fill" + case .directionalPadUp: return "arrowtriangle.up.circle.fill" + case .directionalPadDown: return "arrowtriangle.down.circle.fill" + case .directionalPadLeft: return "arrowtriangle.left.circle.fill" + case .directionalPadRight: return "arrowtriangle.right.circle.fill" + case .triggerZL: return"zl.rectangle.roundedtop.fill" + case .triggerZR: return "zr.rectangle.roundedtop.fill" + case .triggerL: return "l.rectangle.roundedbottom.fill" + case .triggerR: return "r.rectangle.roundedbottom.fill" + case .plus: return "plus.circle.fill" + case .minus: return "minus.circle.fill" + case .home: return "house.circle.fill" + default: return "" + } + } +} diff --git a/src/ios/CoreSettingsView.swift b/src/ios/CoreSettingsView.swift new file mode 100644 index 0000000000..12b5c3c624 --- /dev/null +++ b/src/ios/CoreSettingsView.swift @@ -0,0 +1,88 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import SwiftUI +import Foundation +import AppUI + +struct CoreSettingsView: View { + @State private var text: String = "" + @State private var isLoading: Bool = true + @Environment(\.presentationMode) var presentationMode + + var body: some View { + VStack { + if isLoading { + ProgressView() + .progressViewStyle(CircularProgressViewStyle()) + } else { + TextEditor(text: $text) + .padding() + + } + } + .toolbar { + ToolbarItem(placement: .navigationBarTrailing) { + Button { + let documentDirectory = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0] + let configfolder = documentDirectory.appendingPathComponent("config", conformingTo: .folder) + let fileURL = configfolder.appendingPathComponent("config.ini") + + presentationMode.wrappedValue.dismiss() + + do { + try FileManager.default.removeItem(at: fileURL) + } catch { + print("\(error.localizedDescription)") + } + + AppUI.shared.settingsSaved() + + } label: { + Text("Reset File") + } + } + } + .onAppear { + loadFile() + } + .onDisappear() { + saveFile() + } + } + + private func loadFile() { + let fileManager = FileManager.default + let documentDirectory = fileManager.urls(for: .documentDirectory, in: .userDomainMask)[0] + let configfolder = documentDirectory.appendingPathComponent("config", conformingTo: .folder) + let fileURL = configfolder.appendingPathComponent("config.ini") + + if fileManager.fileExists(atPath: fileURL.path) { + do { + text = try String(contentsOf: fileURL, encoding: .utf8) + } catch { + print("Error reading file: \(error)") + } + } else { + text = "" // Initialize with empty text if file doesn't exist + } + isLoading = false + } + + private func saveFile() { + let fileManager = FileManager.default + let documentDirectory = fileManager.urls(for: .documentDirectory, in: .userDomainMask)[0] + let configfolder = documentDirectory.appendingPathComponent("config", conformingTo: .folder) + let fileURL = configfolder.appendingPathComponent("config.ini") + + do { + try text.write(to: fileURL, atomically: true, encoding: .utf8) + AppUI.shared.settingsSaved() + print("File saved successfully!") + } catch { + print("Error saving file: \(error)") + } + } +} diff --git a/src/ios/DetectServer.swift b/src/ios/DetectServer.swift new file mode 100644 index 0000000000..a36563fc66 --- /dev/null +++ b/src/ios/DetectServer.swift @@ -0,0 +1,26 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import Foundation + +func isSideJITServerDetected(completion: @escaping (Result) -> Void) { + let address = UserDefaults.standard.string(forKey: "sidejitserver") ?? "" + var SJSURL = address + if (address).isEmpty { + SJSURL = "http://sidejitserver._http._tcp.local:8080" + } + // Create a network operation at launch to Refresh SideJITServer + let url = URL(string: SJSURL)! + let task = URLSession.shared.dataTask(with: url) { (data, response, error) in + if let error = error { + print("No SideJITServer on Network") + completion(.failure(error)) + return + } + completion(.success(())) + } + task.resume() + return +} diff --git a/src/ios/EmulationGame.swift b/src/ios/EmulationGame.swift new file mode 100644 index 0000000000..da06753279 --- /dev/null +++ b/src/ios/EmulationGame.swift @@ -0,0 +1,31 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import Foundation + +struct EmulationGame : Comparable, Hashable, Identifiable { + var id = UUID() + + let developer: String + let fileURL: URL + let imageData: Data + let title: String + + func hash(into hasher: inout Hasher) { + hasher.combine(id) + hasher.combine(developer) + hasher.combine(fileURL) + hasher.combine(imageData) + hasher.combine(title) + } + + static func < (lhs: EmulationGame, rhs: Yuzu) -> Bool { + lhs.title < rhs.title + } + + static func == (lhs: EmulationGame, rhs: Yuzu) -> Bool { + lhs.title == rhs.title + } +} diff --git a/src/ios/EmulationHandler.swift b/src/ios/EmulationHandler.swift new file mode 100644 index 0000000000..9ae1e2f248 --- /dev/null +++ b/src/ios/EmulationHandler.swift @@ -0,0 +1,96 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import SwiftUI +import AppUI +import Metal +import Foundation + +class EmulationViewModel: ObservableObject { + @Published var isShowingCustomButton = true + @State var should = false + var device: MTLDevice? + @State var mtkView: MTKView = MTKView() + var CaLayer: CAMetalLayer? + private var sudachiGame: EmulationGame? + private let appui = AppUI.shared + private var thread: Thread! + private var isRunning = false + var doesneedresources = false + @State var iscustom: Bool = false + + init(game: EmulationGame?) { + self.device = MTLCreateSystemDefaultDevice() + self.sudachiGame = game + } + + func configureAppUI(with mtkView: MTKView) { + self.mtkView = mtkView + device = self.mtkView.device + guard !isRunning else { return } + isRunning = true + appui.configure(layer: mtkView.layer as! CAMetalLayer, with: mtkView.frame.size) + + iscustom = ((sudachiGame?.fileURL.startAccessingSecurityScopedResource()) != nil) + + DispatchQueue.global(qos: .userInitiated).async { [self] in + if let sudachiGame = self.sudachiGame { + self.appui.insert(game: sudachiGame.fileURL) + } else { + self.appui.bootOS() + } + } + + thread = .init(block: self.step) + thread.name = "Yuzu" + thread.qualityOfService = .userInteractive + thread.threadPriority = 0.9 + thread.start() + } + + private func step() { + while true { + appui.step() + } + } + + func customButtonTapped() { + stopEmulation() + } + + private func stopEmulation() { + if isRunning { + isRunning = false + appui.exit() + thread.cancel() + if iscustom { + sudachiGame?.fileURL.stopAccessingSecurityScopedResource() + } + } + } + + func handleOrientationChange() { + DispatchQueue.main.async { [weak self] in + guard let self = self else { return } + let interfaceOrientation = self.getInterfaceOrientation(from: UIDevice.current.orientation) + self.appui.orientationChanged(orientation: interfaceOrientation, with: self.mtkView.layer as! CAMetalLayer, size: mtkView.frame.size) + } + } + + private func getInterfaceOrientation(from deviceOrientation: UIDeviceOrientation) -> UIInterfaceOrientation { + switch deviceOrientation { + case .portrait: + return .portrait + case .portraitUpsideDown: + return .portraitUpsideDown + case .landscapeLeft: + return .landscapeRight + case .landscapeRight: + return .landscapeLeft + default: + return .unknown + } + } +} diff --git a/src/ios/EmulationScreenView.swift b/src/ios/EmulationScreenView.swift new file mode 100644 index 0000000000..9db9d5abba --- /dev/null +++ b/src/ios/EmulationScreenView.swift @@ -0,0 +1,133 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import SwiftUI +import AppUI +import MetalKit + +class EmulationScreenView: UIView { + var primaryScreen: UIView! + var portraitconstraints = [NSLayoutConstraint]() + var landscapeconstraints = [NSLayoutConstraint]() + var fullscreenconstraints = [NSLayoutConstraint]() + let appui = AppUI.shared + let userDefaults = UserDefaults.standard + + override init(frame: CGRect) { + super.init(frame: frame) + if UIDevice.current.userInterfaceIdiom == .pad { + setupAppUIScreenforiPad() + } else { + setupAppUIScreen() + } + } + + required init?(coder: NSCoder) { + super.init(coder: coder) + if UIDevice.current.userInterfaceIdiom == .pad { + setupAppUIScreenforiPad() + } else { + setupAppUIScreen() + } + + } + + override func touchesBegan(_ touches: Set, with event: UIEvent?) { + super.touchesBegan(touches, with: event) + guard let touch = touches.first else { + return + } + + print("Location: \(touch.location(in: primaryScreen))") + appui.touchBegan(at: touch.location(in: primaryScreen), for: 0) + } + + override func touchesEnded(_ touches: Set, with event: UIEvent?) { + super.touchesEnded(touches, with: event) + print("Touch Ended") + appui.touchEnded(for: 0) + } + + override func touchesMoved(_ touches: Set, with event: UIEvent?) { + super.touchesMoved(touches, with: event) + guard let touch = touches.first else { + return + } + let location = touch.location(in: primaryScreen) + print("Location Moved: \(location)") + appui.touchMoved(at: location, for: 0) + } + + func setupAppUIScreenforiPad() { + primaryScreen = MTKView(frame: .zero, device: MTLCreateSystemDefaultDevice()) + primaryScreen.translatesAutoresizingMaskIntoConstraints = false + primaryScreen.clipsToBounds = true + primaryScreen.layer.borderColor = UIColor.secondarySystemBackground.cgColor + primaryScreen.layer.borderWidth = 3 + primaryScreen.layer.cornerCurve = .continuous + primaryScreen.layer.cornerRadius = 10 + addSubview(primaryScreen) + + + portraitconstraints = [ + primaryScreen.topAnchor.constraint(equalTo: safeAreaLayoutGuide.topAnchor, constant: 10), + primaryScreen.leadingAnchor.constraint(equalTo: safeAreaLayoutGuide.leadingAnchor, constant: 10), + primaryScreen.trailingAnchor.constraint(equalTo: safeAreaLayoutGuide.trailingAnchor, constant: -10), + primaryScreen.heightAnchor.constraint(equalTo: primaryScreen.widthAnchor, multiplier: 9 / 16), + ] + + landscapeconstraints = [ + primaryScreen.topAnchor.constraint(equalTo: safeAreaLayoutGuide.topAnchor, constant: 50), + primaryScreen.bottomAnchor.constraint(equalTo: safeAreaLayoutGuide.bottomAnchor, constant: -100), + primaryScreen.widthAnchor.constraint(equalTo: primaryScreen.heightAnchor, multiplier: 16 / 9), + primaryScreen.centerXAnchor.constraint(equalTo: safeAreaLayoutGuide.centerXAnchor), + ] + + + updateConstraintsForOrientation() + } + + + + func setupAppUIScreen() { + primaryScreen = MTKView(frame: .zero, device: MTLCreateSystemDefaultDevice()) + primaryScreen.translatesAutoresizingMaskIntoConstraints = false + primaryScreen.clipsToBounds = true + primaryScreen.layer.borderColor = UIColor.secondarySystemBackground.cgColor + primaryScreen.layer.borderWidth = 3 + primaryScreen.layer.cornerCurve = .continuous + primaryScreen.layer.cornerRadius = 10 + addSubview(primaryScreen) + + + portraitconstraints = [ + primaryScreen.topAnchor.constraint(equalTo: safeAreaLayoutGuide.topAnchor, constant: 10), + primaryScreen.leadingAnchor.constraint(equalTo: safeAreaLayoutGuide.leadingAnchor, constant: 10), + primaryScreen.trailingAnchor.constraint(equalTo: safeAreaLayoutGuide.trailingAnchor, constant: -10), + primaryScreen.heightAnchor.constraint(equalTo: primaryScreen.widthAnchor, multiplier: 9 / 16), + ] + + landscapeconstraints = [ + primaryScreen.topAnchor.constraint(equalTo: safeAreaLayoutGuide.topAnchor, constant: 10), + primaryScreen.bottomAnchor.constraint(equalTo: safeAreaLayoutGuide.bottomAnchor, constant: -10), + primaryScreen.widthAnchor.constraint(equalTo: primaryScreen.heightAnchor, multiplier: 16 / 9), + primaryScreen.centerXAnchor.constraint(equalTo: safeAreaLayoutGuide.centerXAnchor), + ] + + updateConstraintsForOrientation() + } + + override func layoutSubviews() { + super.layoutSubviews() + updateConstraintsForOrientation() + } + + private func updateConstraintsForOrientation() { + removeConstraints(portraitconstraints) + removeConstraints(landscapeconstraints) + let isPortrait = UIApplication.shared.statusBarOrientation.isPortrait + addConstraints(isPortrait ? portraitconstraints : landscapeconstraints) + } +} diff --git a/src/ios/EmulationSession.h b/src/ios/EmulationSession.h new file mode 100644 index 0000000000..c6bce5e4dd --- /dev/null +++ b/src/ios/EmulationSession.h @@ -0,0 +1,103 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Jarrod Norwell +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#import +#import + +#if __has_include() +#import +#else +#import +#endif +#import "EmulationWindow.h" + +#include "common/detached_tasks.h" +#include "core/core.h" +#include "core/file_sys/registered_cache.h" +#include "core/hle/service/acc/profile_manager.h" +#include "core/perf_stats.h" +#include "frontend_common/content_manager.h" +#include "video_core/rasterizer_interface.h" + +class EmulationSession final { +public: + explicit EmulationSession(); + ~EmulationSession() = default; + + static EmulationSession& GetInstance(); + const Core::System& System() const; + Core::System& System(); + FileSys::ManualContentProvider* GetContentProvider(); + InputCommon::InputSubsystem& GetInputSubsystem(); + + const EmulationWindow& Window() const; + EmulationWindow& Window(); + CAMetalLayer* NativeWindow() const; + void SetNativeWindow(CAMetalLayer* native_window, CGSize size); + void SurfaceChanged(); + + void InitializeGpuDriver(); + + bool IsRunning() const; + bool IsPaused() const; + void PauseEmulation(); + void UnPauseEmulation(); + void HaltEmulation(); + void RunEmulation(); + void ShutdownEmulation(); + + const Core::PerfStatsResults& PerfStats(); + void ConfigureFilesystemProvider(const std::string& filepath); + void InitializeSystem(bool reload); + void SetAppletId(int applet_id); + Core::SystemResultStatus InitializeEmulation(const std::string& filepath, + const std::size_t program_index, + const bool frontend_initiated); + Core::SystemResultStatus BootOS(); + + static void OnEmulationStarted(); + static u64 GetProgramId(std::string programId); + bool IsInitialized() { return is_initialized; }; + + bool IsHandheldOnly(); + void SetDeviceType([[maybe_unused]] int index, int type); + void OnGamepadConnectEvent([[maybe_unused]] int index); + void OnGamepadDisconnectEvent([[maybe_unused]] int index); +private: + static void LoadDiskCacheProgress(VideoCore::LoadCallbackStage stage, int progress, int max); + static void OnEmulationStopped(Core::SystemResultStatus result); + static void ChangeProgram(std::size_t program_index); + +private: + // Window management + std::unique_ptr m_window; + CAMetalLayer* m_native_window{}; + + // Core emulation + Core::System m_system; + InputCommon::InputSubsystem m_input_subsystem; + Common::DetachedTasks m_detached_tasks; + Core::PerfStatsResults m_perf_stats{}; + std::shared_ptr m_vfs; + Core::SystemResultStatus m_load_result{Core::SystemResultStatus::ErrorNotInitialized}; + std::atomic m_is_running = false; + std::atomic m_is_paused = false; + std::unique_ptr m_manual_provider; + int m_applet_id{1}; + + // GPU driver parameters + std::shared_ptr m_vulkan_library; + + // Synchronization + std::condition_variable_any m_cv; + mutable std::mutex m_mutex; + bool is_initialized = false; + CGSize m_size; + + // Program index for next boot + std::atomic m_next_program_index = -1; +}; diff --git a/src/ios/EmulationSession.mm b/src/ios/EmulationSession.mm new file mode 100644 index 0000000000..9861e0a544 --- /dev/null +++ b/src/ios/EmulationSession.mm @@ -0,0 +1,474 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Jarrod Norwell +// SPDX-License-Identifier: GPL-2.0-or-later + +#import "EmulationSession.h" + +#include + +#include +#include +#include +#include +#include + +#include "common/fs/fs.h" +#include "core/file_sys/patch_manager.h" +#include "core/file_sys/savedata_factory.h" +#include "core/loader/nro.h" +#include "frontend_common/content_manager.h" + +#include "common/detached_tasks.h" +#include "common/dynamic_library.h" +#include "common/fs/path_util.h" +#include "common/logging.h" +#include "common/scm_rev.h" +#include "common/scope_exit.h" +#include "common/settings.h" +#include "common/string_util.h" +#include "core/core.h" +#include "core/cpu_manager.h" +#include "core/crypto/key_manager.h" +#include "core/file_sys/card_image.h" +#include "core/file_sys/content_archive.h" +#include "core/file_sys/fs_filesystem.h" +#include "core/file_sys/submission_package.h" +#include "core/file_sys/vfs/vfs.h" +#include "core/file_sys/vfs/vfs_real.h" +#include "core/frontend/applets/cabinet.h" +#include "core/frontend/applets/controller.h" +#include "core/frontend/applets/error.h" +#include "core/frontend/applets/general.h" +#include "core/frontend/applets/mii_edit.h" +#include "core/frontend/applets/profile_select.h" +#include "core/frontend/applets/software_keyboard.h" +#include "core/frontend/applets/web_browser.h" +#include "core/hle/service/am/applet_manager.h" +#include "core/hle/service/am/frontend/applets.h" +#include "core/hle/service/filesystem/filesystem.h" +#include "core/loader/loader.h" +#include "frontend_common/config.h" +#include "hid_core/frontend/emulated_controller.h" +#include "hid_core/hid_core.h" +#include "hid_core/hid_types.h" +#include "video_core/renderer_base.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/vulkan_common/vulkan_instance.h" +#include "video_core/vulkan_common/vulkan_surface.h" + +#define jconst [[maybe_unused]] const auto +#define jauto [[maybe_unused]] auto + +static EmulationSession s_instance; + +EmulationSession::EmulationSession() { + m_vfs = std::make_shared(); +} + +EmulationSession& EmulationSession::GetInstance() { + return s_instance; +} + +const Core::System& EmulationSession::System() const { + return m_system; +} + +Core::System& EmulationSession::System() { + return m_system; +} + +FileSys::ManualContentProvider* EmulationSession::GetContentProvider() { + return m_manual_provider.get(); +} + +InputCommon::InputSubsystem& EmulationSession::GetInputSubsystem() { + return m_input_subsystem; +} + +const EmulationWindow& EmulationSession::Window() const { + return *m_window; +} + +EmulationWindow& EmulationSession::Window() { + return *m_window; +} + +CAMetalLayer* EmulationSession::NativeWindow() const { + return m_native_window; +} + +void EmulationSession::SetNativeWindow(CAMetalLayer* native_window, CGSize size) { + m_native_window = native_window; + m_size = size; +} + +void EmulationSession::InitializeGpuDriver() { + m_vulkan_library = std::make_shared(dlopen("@executable_path/Frameworks/MoltenVK", RTLD_NOW)); +} + +bool EmulationSession::IsRunning() const { + return m_is_running; +} + +bool EmulationSession::IsPaused() const { + return m_is_running && m_is_paused; +} + +const Core::PerfStatsResults& EmulationSession::PerfStats() { + m_perf_stats = m_system.GetAndResetPerfStats(); + return m_perf_stats; +} + +void EmulationSession::SurfaceChanged() { + if (!IsRunning()) { + return; + } + m_window->OnSurfaceChanged(m_native_window, m_size); +} + +void EmulationSession::ConfigureFilesystemProvider(const std::string& filepath) { + const auto file = m_system.GetFilesystem()->OpenFile(filepath, FileSys::OpenMode::Read); + if (!file) { + return; + } + + auto loader = Loader::GetLoader(m_system, file); + if (!loader) { + return; + } + + const auto file_type = loader->GetFileType(); + if (file_type == Loader::FileType::Unknown || file_type == Loader::FileType::Error) { + return; + } + + u64 program_id = 0; + const auto res2 = loader->ReadProgramId(program_id); + if (res2 == Loader::ResultStatus::Success && file_type == Loader::FileType::NCA) { + m_manual_provider->AddEntry(FileSys::TitleType::Application, + FileSys::GetCRTypeFromNCAType(FileSys::NCA{file}.GetType()), + program_id, file); + } else if (res2 == Loader::ResultStatus::Success && + (file_type == Loader::FileType::XCI || file_type == Loader::FileType::NSP)) { + const auto nsp = file_type == Loader::FileType::NSP + ? std::make_shared(file) + : FileSys::XCI{file}.GetSecurePartitionNSP(); + for (const auto& title : nsp->GetNCAs()) { + for (const auto& entry : title.second) { + m_manual_provider->AddEntry(entry.first.first, entry.first.second, title.first, + entry.second->GetBaseFile()); + } + } + } +} + +void EmulationSession::InitializeSystem(bool reload) { + if (!reload) { + SDL_SetMainReady(); + + // Initialize logging system + Common::Log::Initialize(); + Common::Log::SetColorConsoleBackendEnabled(true); + Common::Log::Start(); + } + + // Initialize filesystem. + m_system.SetFilesystem(m_vfs); + m_system.GetUserChannel().clear(); + m_manual_provider = std::make_unique(); + m_system.SetContentProvider(std::make_unique()); + m_system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::FrontendManual, + m_manual_provider.get()); + m_system.GetFileSystemController().CreateFactories(*m_vfs); + + is_initialized = true; +} + +void EmulationSession::SetAppletId(int applet_id) { + m_applet_id = applet_id; + m_system.GetFrontendAppletHolder().SetCurrentAppletId( + static_cast(m_applet_id)); +} + +Core::SystemResultStatus EmulationSession::InitializeEmulation(const std::string& filepath, + const std::size_t program_index, + const bool frontend_initiated) { + std::scoped_lock lock(m_mutex); + + // Create the render window. + m_window = std::make_unique(&m_input_subsystem, m_native_window, m_size, m_vulkan_library); + + // Initialize system. + m_system.SetShuttingDown(false); + m_system.ApplySettings(); + Settings::LogSettings(); + m_system.HIDCore().ReloadInputDevices(); + m_system.SetFrontendAppletSet(Service::AM::Frontend::FrontendAppletSet{}); + + // Initialize filesystem. + ConfigureFilesystemProvider(filepath); + + // Load the ROM. + Service::AM::FrontendAppletParameters params{ + .applet_id = static_cast(m_applet_id), + .launch_type = frontend_initiated ? Service::AM::LaunchType::FrontendInitiated + : Service::AM::LaunchType::ApplicationInitiated, + .program_index = static_cast(program_index), + }; + m_load_result = m_system.Load(EmulationSession::GetInstance().Window(), filepath, params); + if (m_load_result != Core::SystemResultStatus::Success) { + return m_load_result; + } + + // Complete initialization. + m_system.GPU().Start(); + m_system.GetCpuManager().OnGpuReady(); + m_system.RegisterExitCallback([&] { HaltEmulation(); }); + + if (Settings::values.use_disk_shader_cache.GetValue()) { + m_system.Renderer().ReadRasterizer()->LoadDiskResources( + m_system.GetApplicationProcessProgramID(), std::stop_token{}, + [](VideoCore::LoadCallbackStage, size_t value, size_t total) {}); + } + + // Register an ExecuteProgram callback such that Core can execute a sub-program + m_system.RegisterExecuteProgramCallback([&](std::size_t program_index_) { + m_next_program_index = program_index_; + EmulationSession::GetInstance().HaltEmulation(); + ChangeProgram(m_next_program_index); + }); + + OnEmulationStarted(); + return Core::SystemResultStatus::Success; +} + + +Core::SystemResultStatus EmulationSession::BootOS() { + std::scoped_lock lock(m_mutex); + + // Create the render window. + m_window = std::make_unique(&m_input_subsystem, m_native_window, m_size, m_vulkan_library); + + // Initialize system. + m_system.SetShuttingDown(false); + m_system.ApplySettings(); + Settings::LogSettings(); + m_system.HIDCore().ReloadInputDevices(); + m_system.SetFrontendAppletSet(Service::AM::Frontend::FrontendAppletSet{}); + + constexpr u64 QLaunchId = static_cast(Service::AM::AppletProgramId::QLaunch); + auto bis_system = m_system.GetFileSystemController().GetSystemNANDContents(); + + auto qlaunch_applet_nca = bis_system->GetEntry(QLaunchId, FileSys::ContentRecordType::Program); + + m_system.GetFrontendAppletHolder().SetCurrentAppletId(Service::AM::AppletId::QLaunch); + + const auto filename = qlaunch_applet_nca->GetFullPath(); + + auto params = Service::AM::FrontendAppletParameters { + .program_id = QLaunchId, + .applet_id = Service::AM::AppletId::QLaunch, + .applet_type = Service::AM::AppletType::LibraryApplet + }; + + m_load_result = m_system.Load(EmulationSession::GetInstance().Window(), filename, params); + + if (m_load_result != Core::SystemResultStatus::Success) { + return m_load_result; + } + + // Complete initialization. + m_system.GPU().Start(); + m_system.GetCpuManager().OnGpuReady(); + m_system.RegisterExitCallback([&] { HaltEmulation(); }); + + if (Settings::values.use_disk_shader_cache.GetValue()) { + m_system.Renderer().ReadRasterizer()->LoadDiskResources( + m_system.GetApplicationProcessProgramID(), std::stop_token{}, + [](VideoCore::LoadCallbackStage, size_t value, size_t total) {}); + } + + // Register an ExecuteProgram callback such that Core can execute a sub-program + m_system.RegisterExecuteProgramCallback([&](std::size_t program_index_) { + m_next_program_index = program_index_; + EmulationSession::GetInstance().HaltEmulation(); + }); + + OnEmulationStarted(); + return Core::SystemResultStatus::Success; +} + +void EmulationSession::ShutdownEmulation() { + std::scoped_lock lock(m_mutex); + + if (m_next_program_index != -1) { + ChangeProgram(m_next_program_index); + m_next_program_index = -1; + } + + m_is_running = false; + + // Unload user input. + m_system.HIDCore().UnloadInputDevices(); + + // Enable all controllers + m_system.HIDCore().SetSupportedStyleTag({Core::HID::NpadStyleSet::All}); + + // Shutdown the main emulated process + if (m_load_result == Core::SystemResultStatus::Success) { + m_system.DetachDebugger(); + m_system.ShutdownMainProcess(); + m_detached_tasks.WaitForAllTasks(); + m_load_result = Core::SystemResultStatus::ErrorNotInitialized; + m_window.reset(); + OnEmulationStopped(Core::SystemResultStatus::Success); + return; + } + + // Tear down the render window. + m_window.reset(); +} + +void EmulationSession::PauseEmulation() { + std::scoped_lock lock(m_mutex); + m_system.Pause(); + m_is_paused = true; +} + +void EmulationSession::UnPauseEmulation() { + std::scoped_lock lock(m_mutex); + m_system.Run(); + m_is_paused = false; +} + +void EmulationSession::HaltEmulation() { + std::scoped_lock lock(m_mutex); + m_is_running = false; + m_cv.notify_one(); +} + +void EmulationSession::RunEmulation() { + { + std::scoped_lock lock(m_mutex); + m_is_running = true; + } + + // Load the disk shader cache. + if (Settings::values.use_disk_shader_cache.GetValue()) { + LoadDiskCacheProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); + m_system.Renderer().ReadRasterizer()->LoadDiskResources( + m_system.GetApplicationProcessProgramID(), std::stop_token{}, LoadDiskCacheProgress); + LoadDiskCacheProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); + } + + void(m_system.Run()); + + if (m_system.DebuggerEnabled()) { + m_system.InitializeDebugger(); + } + + while (true) { + { + [[maybe_unused]] std::unique_lock lock(m_mutex); + if (m_cv.wait_for(lock, std::chrono::milliseconds(800), + [&]() { return !m_is_running; })) { + // Emulation halted. + break; + } + } + } + + // Reset current applet ID. + m_applet_id = static_cast(Service::AM::AppletId::Application); +} + +void EmulationSession::LoadDiskCacheProgress(VideoCore::LoadCallbackStage stage, int progress, + int max) { + +} + +void EmulationSession::OnEmulationStarted() { + +} + +void EmulationSession::OnEmulationStopped(Core::SystemResultStatus result) { + +} + +void EmulationSession::ChangeProgram(std::size_t program_index) { + LOG_INFO(Frontend, "Trying To Switch Program"); + // Halt current emulation session + EmulationSession::GetInstance().HaltEmulation(); + // Save the current state if necessary + + // Shutdown the current emulation session cleanly + // Update the program index + EmulationSession::GetInstance().m_next_program_index = program_index; + + // Initialize the new program + // Start the new emulation session + EmulationSession::GetInstance().RunEmulation(); +} + +u64 EmulationSession::GetProgramId(std::string programId) { + try { + return std::stoull(programId); + } catch (...) { + return 0; + } +} + +bool EmulationSession::IsHandheldOnly() { + jconst npad_style_set = m_system.HIDCore().GetSupportedStyleTag(); + + if (npad_style_set.fullkey == 1) { + return false; + } + + if (npad_style_set.handheld == 0) { + return false; + } + + return !Settings::IsDockedMode(); +} + +void EmulationSession::SetDeviceType([[maybe_unused]] int index, int type) { + jauto controller = m_system.HIDCore().GetEmulatedControllerByIndex(index); + controller->SetNpadStyleIndex(static_cast(type)); +} + +void EmulationSession::OnGamepadConnectEvent([[maybe_unused]] int index) { + jauto controller = m_system.HIDCore().GetEmulatedControllerByIndex(index); + + // Ensure that player1 is configured correctly and handheld disconnected + if (controller->GetNpadIdType() == Core::HID::NpadIdType::Player1) { + jauto handheld = m_system.HIDCore().GetEmulatedController(Core::HID::NpadIdType::Handheld); + + if (controller->GetNpadStyleIndex() == Core::HID::NpadStyleIndex::Handheld) { + handheld->SetNpadStyleIndex(Core::HID::NpadStyleIndex::Fullkey); + controller->SetNpadStyleIndex(Core::HID::NpadStyleIndex::Fullkey); + handheld->Disconnect(); + } + } + + // Ensure that handheld is configured correctly and player 1 disconnected + if (controller->GetNpadIdType() == Core::HID::NpadIdType::Handheld) { + jauto player1 = m_system.HIDCore().GetEmulatedController(Core::HID::NpadIdType::Player1); + + if (controller->GetNpadStyleIndex() != Core::HID::NpadStyleIndex::Handheld) { + player1->SetNpadStyleIndex(Core::HID::NpadStyleIndex::Handheld); + controller->SetNpadStyleIndex(Core::HID::NpadStyleIndex::Handheld); + player1->Disconnect(); + } + } + + if (!controller->IsConnected()) { + controller->Connect(); + } +} + +void EmulationSession::OnGamepadDisconnectEvent([[maybe_unused]] int index) { + jauto controller = m_system.HIDCore().GetEmulatedControllerByIndex(index); + controller->Disconnect(); +} diff --git a/src/ios/EmulationView.swift b/src/ios/EmulationView.swift new file mode 100644 index 0000000000..586afa88c1 --- /dev/null +++ b/src/ios/EmulationView.swift @@ -0,0 +1,137 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import SwiftUI +import AppUI +import Foundation +import GameController +import UIKit +import SwiftUIIntrospect + +struct EmulationView: View { + @StateObject private var viewModel: EmulationViewModel + @State var controllerconnected = false + @State var appui = AppUI.shared + var device: MTLDevice? = MTLCreateSystemDefaultDevice() + @State var CaLayer: CAMetalLayer? + @State var ShowPopup: Bool = false + @State var mtkview: MTKView? + @State private var thread: Thread! + @State var uiTabBarController: UITabBarController? + @State private var isFirstFrameShown = false + @State private var timer: Timer? + @Environment(\.scenePhase) var scenePhase + + init(game: EmulationGame?) { + _viewModel = StateObject(wrappedValue: EmulationViewModel(game: game)) + } + + var body: some View { + ZStack { + MetalView(device: device) { view in + DispatchQueue.main.async { + if let metalView = view as? MTKView { + mtkview = metalView + viewModel.configureAppUI(with: metalView) + } else { + print("Error: view is not of type MTKView") + } + } + } + .onRotate { size in + viewModel.handleOrientationChange() + } + ControllerView() + } + .overlay( + // Loading screen overlay on top of MetalView + Group { + if !isFirstFrameShown { + LoadingView() + } + } + .transition(.opacity) + ) + .onAppear { + UIApplication.shared.isIdleTimerDisabled = true + startPollingFirstFrameShowed() + } + .onDisappear { + stopPollingFirstFrameShowed() + uiTabBarController?.tabBar.isHidden = false + viewModel.customButtonTapped() + } + .navigationBarBackButtonHidden(true) + .introspect(.tabView, on: .iOS(.v13, .v14, .v15, .v16, .v17)) { (tabBarController) in + tabBarController.tabBar.isHidden = true + uiTabBarController = tabBarController + } + } + + private func startPollingFirstFrameShowed() { + timer = Timer.scheduledTimer(withTimeInterval: 0.1, repeats: true) { _ in + if appui.FirstFrameShowed() { + withAnimation { + isFirstFrameShown = true + } + stopPollingFirstFrameShowed() + } + } + } + + private func stopPollingFirstFrameShowed() { + timer?.invalidate() + timer = nil + print("Timer Invalidated") + } +} + +struct LoadingView: View { + var body: some View { + VStack { + ProgressView("Loading...") + // .font(.system(size: 90)) + .progressViewStyle(CircularProgressViewStyle()) + .padding() + Text("Please wait while the game loads.") + } + .frame(maxWidth: .infinity, maxHeight: .infinity) + .background(Color.black.opacity(0.8)) + .foregroundColor(.white) + } +} + +extension View { + func onRotate(perform action: @escaping (CGSize) -> Void) -> some View { + self.modifier(DeviceRotationModifier(action: action)) + } +} + +struct DeviceRotationModifier: ViewModifier { + let action: (CGSize) -> Void + @State var startedfirst: Bool = false + + func body(content: Content) -> some View { content + .background(GeometryReader { geometry in + Color.clear + .preference(key: SizePreferenceKey.self, value: geometry.size) + }) + .onPreferenceChange(SizePreferenceKey.self) { newSize in + if startedfirst { + action(newSize) + } else { + startedfirst = true + } + } + } +} + +struct SizePreferenceKey: PreferenceKey { + static var defaultValue: CGSize = .zero + + static func reduce(value: inout CGSize, nextValue: () -> CGSize) { + value = nextValue() + } +} diff --git a/src/ios/EmulationWindow.h b/src/ios/EmulationWindow.h new file mode 100644 index 0000000000..8d285ab33e --- /dev/null +++ b/src/ios/EmulationWindow.h @@ -0,0 +1,86 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Jarrod Norwell +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#if __has_include() +#import +#else +#import +#endif +#import +#import +#import +#import + +#include +#include + +#include "core/frontend/emu_window.h" +#include "core/frontend/graphics_context.h" +#include "input_common/main.h" + +class GraphicsContext_Apple final : public Core::Frontend::GraphicsContext { +public: + explicit GraphicsContext_Apple(std::shared_ptr driver_library) + : m_driver_library{driver_library} {} + + ~GraphicsContext_Apple() = default; + + std::shared_ptr GetDriverLibrary() override { + return m_driver_library; + } + +private: + std::shared_ptr m_driver_library; +}; + +NS_ASSUME_NONNULL_BEGIN + +class EmulationWindow final : public Core::Frontend::EmuWindow { +public: + EmulationWindow(InputCommon::InputSubsystem* input_subsystem, CAMetalLayer* surface, CGSize size, + std::shared_ptr driver_library); + + ~EmulationWindow() = default; + + void OnSurfaceChanged(CAMetalLayer* surface, CGSize size); + void OrientationChanged(UIInterfaceOrientation orientation); + void OnFrameDisplayed() override; + + void OnTouchPressed(int id, float x, float y); + void OnTouchMoved(int id, float x, float y); + void OnTouchReleased(int id); + + void OnGamepadButtonEvent(int player_index, int button_id, bool pressed); + void OnGamepadJoystickEvent(int player_index, int stick_id, float x, float y); + void OnGamepadMotionEvent(int player_index, u64 delta_timestamp, float gyro_x, float gyro_y, float gyro_z, float accel_x, float accel_y, float accel_z); + + std::unique_ptr CreateSharedContext() const override { + return {std::make_unique(m_driver_library)}; + } + + + bool HasFirstFrame() const { + return m_first_frame; + } + + bool IsShown() const override { + return true; + }; + +private: + float m_window_width{}; + float m_window_height{}; + CGSize m_size; + bool is_portrait = true; + + InputCommon::InputSubsystem* m_input_subsystem{}; + std::shared_ptr m_driver_library; + + bool m_first_frame = false; +}; + +NS_ASSUME_NONNULL_END diff --git a/src/ios/EmulationWindow.mm b/src/ios/EmulationWindow.mm new file mode 100644 index 0000000000..b917929c2c --- /dev/null +++ b/src/ios/EmulationWindow.mm @@ -0,0 +1,82 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Jarrod Norwell +// SPDX-License-Identifier: GPL-2.0-or-later + +#import "EmulationWindow.h" +#import "EmulationSession.h" + +#include + +#include "common/logging.h" +#include "input_common/drivers/touch_screen.h" +#include "input_common/drivers/virtual_amiibo.h" +#include "input_common/drivers/virtual_gamepad.h" +#include "input_common/main.h" + +void EmulationWindow::OnSurfaceChanged(CAMetalLayer* surface, CGSize size) { + m_size = size; + + m_window_width = size.width; + m_window_height = size.height; + + // Ensures that we emulate with the correct aspect ratio. + // UpdateCurrentFramebufferLayout(m_window_width, m_window_height); + + window_info.render_surface = (__bridge void *)surface; + window_info.render_surface_scale = [[UIScreen mainScreen] nativeScale]; +} + +void EmulationWindow::OrientationChanged(UIInterfaceOrientation orientation) { + is_portrait = orientation == UIInterfaceOrientationPortrait; +} + +void EmulationWindow::OnTouchPressed(int id, float x, float y) { + const auto [touch_x, touch_y] = MapToTouchScreen(x, y); + EmulationSession::GetInstance().GetInputSubsystem().GetTouchScreen()->TouchPressed(touch_x, + touch_y, id); +} + +void EmulationWindow::OnTouchMoved(int id, float x, float y) { + const auto [touch_x, touch_y] = MapToTouchScreen(x, y); + EmulationSession::GetInstance().GetInputSubsystem().GetTouchScreen()->TouchMoved(touch_x, + touch_y, id); +} + +void EmulationWindow::OnTouchReleased(int id) { + EmulationSession::GetInstance().GetInputSubsystem().GetTouchScreen()->TouchReleased(id); +} + +void EmulationWindow::OnGamepadButtonEvent(int player_index, int button_id, bool pressed) { + m_input_subsystem->GetVirtualGamepad()->SetButtonState(player_index, button_id, pressed); +} + +void EmulationWindow::OnGamepadJoystickEvent(int player_index, int stick_id, float x, float y) { + m_input_subsystem->GetVirtualGamepad()->SetStickPosition(player_index, stick_id, x, y); +} + +void EmulationWindow::OnGamepadMotionEvent(int player_index, u64 delta_timestamp, float gyro_x, float gyro_y, float gyro_z, float accel_x, float accel_y, float accel_z) { + m_input_subsystem->GetVirtualGamepad()->SetMotionState(player_index, delta_timestamp, gyro_x, gyro_y, gyro_z, accel_x, accel_y, accel_z); +} + +void EmulationWindow::OnFrameDisplayed() { + if (!m_first_frame) { + m_first_frame = true; + } +} + +EmulationWindow::EmulationWindow(InputCommon::InputSubsystem* input_subsystem, CAMetalLayer* surface, CGSize size, std::shared_ptr driver_library) + : m_window_width{}, m_window_height{}, m_size{size}, is_portrait{true}, m_input_subsystem{input_subsystem}, m_driver_library{driver_library}, m_first_frame{false} { + LOG_INFO(Frontend, "initializing"); + + if (!surface) { + LOG_CRITICAL(Frontend, "surface is nullptr"); + return; + } + + OnSurfaceChanged(surface, m_size); + window_info.render_surface_scale = [[UIScreen mainScreen] nativeScale]; + window_info.type = Core::Frontend::WindowSystemType::Cocoa; + + m_input_subsystem->Initialize(); +} diff --git a/src/ios/EnableJIT.swift b/src/ios/EnableJIT.swift new file mode 100644 index 0000000000..105615fd29 --- /dev/null +++ b/src/ios/EnableJIT.swift @@ -0,0 +1,52 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import Foundation + +enum SideJITServerErrorType: Error { + case invalidURL + case errorConnecting + case deviceNotFound + case other(String) +} + +func sendrequestsidejit(url: String, completion: @escaping (Result) -> Void) { + let url = URL(string: url)! + let task = URLSession.shared.dataTask(with: url) {(data, response, error) in + if let error = error { + completion(.failure(.errorConnecting)) + return + } + guard let data = data, let datastring = String(data: data, encoding: .utf8) else { return } + if datastring == "Enabled JIT" { + completion(.success(())) + } else { + let errorType: SideJITServerErrorType = datastring == "Could not find device!" ? .deviceNotFound : .other(datastring) + completion(.failure(errorType)) + } + } + task.resume() +} + +func sendrefresh(url: String, completion: @escaping (Result) -> Void) { + let url = URL(string: url)! + + let task = URLSession.shared.dataTask(with: url) {(data, response, error) in + if let error = error { + completion(.failure(.errorConnecting)) + return + } + + guard let data = data, let datastring = String(data: data, encoding: .utf8) else { return } + let inputText = "{\"OK\":\"Refreshed!\"}" + if datastring == inputText { + completion(.success(())) + } else { + let errorType: SideJITServerErrorType = datastring == "Could not find device!" ? .deviceNotFound : .other(datastring) + completion(.failure(errorType)) + } + } + task.resume() +} diff --git a/src/ios/FileManager.swift b/src/ios/FileManager.swift new file mode 100644 index 0000000000..f3fddff8b1 --- /dev/null +++ b/src/ios/FileManager.swift @@ -0,0 +1,254 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import SwiftUI +import Foundation +import UIKit +import AppUI +import Zip + +struct Core : Comparable, Hashable { + + let name = "Yuzu" + var games: [EmulationGame] + let root: URL + + static func < (lhs: Core, rhs: Core) -> Bool { + lhs.name < rhs.name + } + + func AddFirmware(at fileURL: URL) { + do { + let fileManager = FileManager.default + let documentsDirectory = fileManager.urls(for: .documentDirectory, in: .userDomainMask).first! + let destinationURL = documentsDirectory.appendingPathComponent("nand/system/Contents/registered") + + + if !fileManager.fileExists(atPath: destinationURL.path) { + try fileManager.createDirectory(at: destinationURL, withIntermediateDirectories: true, attributes: nil) + } + + + try Zip.unzipFile(fileURL, destination: destinationURL, overwrite: true, password: nil) + print("File unzipped successfully to \(destinationURL.path)") + + } catch { + print("Failed to unzip file: \(error)") + } + } +} + +class YuzuFileManager { + static var shared = YuzuFileManager() + + func directories() -> [String : [String : String]] { + [ + "themes" : [:], + "amiibo" : [:], + "cache" : [:], + "config" : [:], + "crash_dumps" : [:], + "dump" : [:], + "keys" : [:], + "load" : [:], + "log" : [:], + "nand" : [:], + "play_time" : [:], + "roms" : [:], + "screenshots" : [:], + "sdmc" : [:], + "shader" : [:], + "tas" : [:], + "icons" : [:] + ] + } + + func createdirectories() throws { + let documentdir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0] + try directories().forEach() { directory, filename in + let directoryURL = documentdir.appendingPathComponent(directory) + + if !FileManager.default.fileExists(atPath: directoryURL.path) { + print("creating dir at \(directoryURL.path)") // yippee + try FileManager.default.createDirectory(at: directoryURL, withIntermediateDirectories: false, attributes: nil) + } + } + } + + func DetectKeys() -> (Bool, Bool) { + var prodkeys = false + var titlekeys = false + let filemanager = FileManager.default + let documentdir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0] + let KeysFolderURL = documentdir.appendingPathComponent("keys") + prodkeys = filemanager.fileExists(atPath: KeysFolderURL.appendingPathComponent("prod.keys").path) + titlekeys = filemanager.fileExists(atPath: KeysFolderURL.appendingPathComponent("title.keys").path) + return (prodkeys, titlekeys) + } +} + +enum LibManError : Error { + case ripenum, urlgobyebye +} + +class LibraryManager { + static let shared = LibraryManager() + let documentdir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent("roms", conformingTo: .folder) + + + func removerom(_ game: EmulationGame) throws { + do { + try FileManager.default.removeItem(at: game.fileURL) + } catch { + throw error + } + } + + func homebrewroms() -> [EmulationGame] { + // TODO(lizzie): this is horrible + var urls: [URL] = [] + let sdmc = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent("sdmc", conformingTo: .folder) + let sdfolder = sdmc.appendingPathComponent("switch", conformingTo: .folder) + if FileManager.default.fileExists(atPath: sdfolder.path) { + if let dirContents = FileManager.default.enumerator(at: sdmc, includingPropertiesForKeys: nil, options: []) { + do { + try dirContents.forEach() { files in + if let file = files as? URL { + let getaboutfile = try file.resourceValues(forKeys: [.isRegularFileKey]) + if let isfile = getaboutfile.isRegularFile, isfile { + if ["nso", "nro"].contains(file.pathExtension.lowercased()) { + urls.append(file) + } + } + } + } + } catch { + if let dirContents = FileManager.default.enumerator(at: documentdir, includingPropertiesForKeys: nil, options: []) { + do { + try dirContents.forEach() { files in + if let file = files as? URL { + let getaboutfile = try file.resourceValues(forKeys: [.isRegularFileKey]) + if let isfile = getaboutfile.isRegularFile, isfile { + if ["nso", "nro"].contains(file.pathExtension.lowercased()) { + urls.append(file) + } + } + } + } + } catch { + print("damn") + if let dirContents = FileManager.default.enumerator(at: documentdir, includingPropertiesForKeys: nil, options: []) { + do { + try dirContents.forEach() { files in + if let file = files as? URL { + let getaboutfile = try file.resourceValues(forKeys: [.isRegularFileKey]) + if let isfile = getaboutfile.isRegularFile, isfile { + if ["nso", "nro"].contains(file.pathExtension.lowercased()) { + urls.append(file) + } + } + } + } + } catch { + return [] + } + } else { + return [] + } + + } + } + } + } + } + if let dirContents = FileManager.default.enumerator(at: documentdir, includingPropertiesForKeys: nil, options: []) { + do { + try dirContents.forEach() { files in + if let file = files as? URL { + let getaboutfile = try file.resourceValues(forKeys: [.isRegularFileKey]) + if let isfile = getaboutfile.isRegularFile, isfile { + if ["nso", "nro"].contains(file.pathExtension.lowercased()) { + urls.append(file) + } + } + } + } + } catch { + return [] + } + } else { + return [] + } + func games(from urls: [URL]) -> [EmulationGame] { + var pomelogames: [EmulationGame] = [] + pomelogames = urls.reduce(into: [EmulationGame]()) { partialResult, element in + let iscustom = element.startAccessingSecurityScopedResource() + let information = AppUI.shared.information(for: element) + let game = EmulationGame(developer: information.developer, fileURL: element, imageData: information.iconData, title: information.title) + if iscustom { + element.stopAccessingSecurityScopedResource() + } + partialResult.append(game) + } + return pomelogames + } + return games(from: urls) + } + + func library() throws -> Core { + func getromsfromdir() throws -> [URL] { + guard let dirContents = FileManager.default.enumerator(at: documentdir, includingPropertiesForKeys: nil, options: []) else { + print("uhoh how unfortunate for some reason FileManager.default.enumerator aint workin") + throw LibManError.ripenum + } + let appui = AppUI.shared + var urls: [URL] = [] + try dirContents.forEach() { files in + if let file = files as? URL { + let getaboutfile = try file.resourceValues(forKeys: [.isRegularFileKey]) + if let isfile = getaboutfile.isRegularFile, isfile { + if ["nca", "nro", "nsp", "nso", "xci"].contains(file.pathExtension.lowercased()) { + urls.append(file) + } + } + } + } + let sdmc = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent("sdmc", conformingTo: .folder) + let sdfolder = sdmc.appendingPathComponent("switch", conformingTo: .folder) + if FileManager.default.fileExists(atPath: sdfolder.path) { + if let dirContents = FileManager.default.enumerator(at: sdmc, includingPropertiesForKeys: nil, options: []) { + try dirContents.forEach() { files in + if let file = files as? URL { + let getaboutfile = try file.resourceValues(forKeys: [.isRegularFileKey]) + if let isfile = getaboutfile.isRegularFile, isfile { + if ["nso", "nro"].contains(file.pathExtension.lowercased()) { + urls.append(file) + } + } + } + } + } + } + appui.insert(games: urls) + return urls + } + + func games(from urls: [URL], core: inout Core) { + core.games = urls.reduce(into: [EmulationGame]()) { partialResult, element in + let iscustom = element.startAccessingSecurityScopedResource() + let information = AppUI.shared.information(for: element) + let game = EmulationGame(developer: information.developer, fileURL: element, imageData: information.iconData, title: information.title) + if iscustom { + element.stopAccessingSecurityScopedResource() + } + partialResult.append(game) + } + } + let directory = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0] + var YuzuCore = Core(games: [], root: directory) + games(from: try getromsfromdir(), core: &YuzuCore) + return YuzuCore + } +} diff --git a/src/ios/FolderMonitor.swift b/src/ios/FolderMonitor.swift new file mode 100644 index 0000000000..ecae4c0857 --- /dev/null +++ b/src/ios/FolderMonitor.swift @@ -0,0 +1,49 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import Foundation + +class FolderMonitor { + private var folderDescriptor: Int32 = -1 + private var folderMonitorSource: DispatchSourceFileSystemObject? + private let folderURL: URL + private let onFolderChange: () -> Void + init(folderURL: URL, onFolderChange: @escaping () -> Void) { + self.folderURL = folderURL + self.onFolderChange = onFolderChange + startMonitoring() + } + private func startMonitoring() { + folderDescriptor = open(folderURL.path, O_EVTONLY) + guard folderDescriptor != -1 else { + print("Failed to open folder descriptor.") + return + } + + folderMonitorSource = DispatchSource.makeFileSystemObjectSource( + fileDescriptor: folderDescriptor, + eventMask: .write, + queue: DispatchQueue.global() + ) + folderMonitorSource?.setEventHandler { [weak self] in + self?.folderDidChange() + } + folderMonitorSource?.setCancelHandler { + close(self.folderDescriptor) + } + folderMonitorSource?.resume() + } + + private func folderDidChange() { + // Detect the change and call the refreshcore function + print("Folder changed! New file added or removed.") + DispatchQueue.main.async { [weak self] in + self?.onFolderChange() + } + } + deinit { + folderMonitorSource?.cancel() + } +} diff --git a/src/ios/GameButtonListView.swift b/src/ios/GameButtonListView.swift new file mode 100644 index 0000000000..c58d329e72 --- /dev/null +++ b/src/ios/GameButtonListView.swift @@ -0,0 +1,40 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, TechGuy +// SPDX-License-Identifier: GPL-3.0-or-later + +import SwiftUI +import Foundation +import UIKit + +struct GameButtonListView: View { + var game: EmulationGame + @Environment(\.colorScheme) var colorScheme + + var body: some View { + HStack(spacing: 15) { + if let image = UIImage(data: game.imageData) { + Image(uiImage: image) + .resizable() + .frame(width: 60, height: 60) + .cornerRadius(8) + } else { + Image(systemName: "photo") + .resizable() + .frame(width: 60, height: 60) + .cornerRadius(8) + } + + VStack(alignment: .leading, spacing: 4) { + Text(game.title) + .font(.headline) + .foregroundColor(colorScheme == .dark ? Color.white : Color.black) + Text(game.developer) + .font(.subheadline) + .foregroundColor(.gray) + } + Spacer() + } + .padding(.vertical, 8) + } +} diff --git a/src/ios/GameButtonView.swift b/src/ios/GameButtonView.swift new file mode 100644 index 0000000000..fae44544e7 --- /dev/null +++ b/src/ios/GameButtonView.swift @@ -0,0 +1,182 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import SwiftUI +import Foundation +import UIKit +import UniformTypeIdentifiers +import Combine + +struct GameIconView: View { + var game: EmulationGame + @Binding var selectedGame: EmulationGame? + @State var startgame: Bool = false + @State var timesTapped: Int = 0 + + var isSelected: Bool { + selectedGame == game + } + + var body: some View { + NavigationLink( + destination: EmulationView(game: game).toolbar(.hidden, for: .tabBar), + isActive: $startgame, + label: { + EmptyView() + } + ) + VStack(spacing: 5) { + if isSelected { + Text(game.title) + .foregroundColor(.blue) + .font(.title2) + } + if let uiImage = UIImage(data: game.imageData) { + Image(uiImage: uiImage) + .resizable() + .scaledToFit() + .frame(width: isSelected ? 200 : 180, height: isSelected ? 200 : 180) + .cornerRadius(10) + .overlay( + isSelected ? RoundedRectangle(cornerRadius: 10) + .stroke(Color.blue, lineWidth: 5) + : nil + ) + .onTapGesture { + if isSelected { + startgame = true + print(isSelected) + } + if !isSelected { + selectedGame = game + } + } + } else { + Image(systemName: "questionmark") + .resizable() + .scaledToFit() + .frame(width: 200, height: 200) + .cornerRadius(10) + .onTapGesture { selectedGame = game } + } + } + .frame(width: 200, height: 250) + } +} + +struct BottomMenuView: View { + @State var core: Core + var body: some View { + HStack(spacing: 40) { + Button { + + } label: { + Circle() + .overlay { + Image(systemName: "message").font(.system(size: 30)).foregroundColor(.red) + } + .frame(width: 50, height: 50) + .foregroundColor(Color.init(uiColor: .lightGray)) + } + Button { + + } label: { + Circle() + .overlay { + Image(systemName: "photo").font(.system(size: 30)).foregroundColor(.blue) + } + .frame(width: 50, height: 50) + .foregroundColor(Color.init(uiColor: .lightGray)) + } + NavigationLink(destination: SettingsView(core: core)) { + Circle() + .overlay { + Image(systemName: "gearshape").foregroundColor(Color.init(uiColor: .darkGray)).font(.system(size: 30)) + } + .frame(width: 50, height: 50) + .foregroundColor(Color.init(uiColor: .lightGray)) + } + + Button { + + } label: { + Circle() + .overlay { + Image(systemName: "power").foregroundColor(Color.init(uiColor: .darkGray)).font(.system(size: 30)) + } + .frame(width: 50, height: 50) + .foregroundColor(Color.init(uiColor: .lightGray)) + } + } + .padding(.bottom, 20) + } +} + +struct HomeView: View { + @State private var selectedGame: EmulationGame? = nil + + @State var core: Core + + init(selectedGame: EmulationGame? = nil, core: Core) { + _core = State(wrappedValue: core) + self.selectedGame = selectedGame + refreshcore() + } + + var body: some View { + NavigationStack { + GeometryReader { geometry in + VStack { + GameCarouselView(core: core, selectedGame: $selectedGame) + Spacer() + BottomMenuView(core: core) + } + } + } + .background(Color.gray.opacity(0.1)) + .edgesIgnoringSafeArea(.all) + .onAppear { + refreshcore() + if let documentsDirectory = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first { + let romsFolderURL = documentsDirectory.appendingPathComponent("roms") + let folderMonitor = FolderMonitor(folderURL: romsFolderURL) { + do { + core = Core(games: [], root: documentsDirectory) + core = try LibraryManager.shared.library() + } catch { + print("Error refreshing core: \(error)") + } + } + } + } + } + + func refreshcore() { + print("Loading library...") + do { + core = try LibraryManager.shared.library() + print(core.games) + } catch { + print("Failed to fetch library: \(error)") + return + } + } +} + + +struct GameCarouselView: View { + // let games: [EmulationGame] + @State var core: Core + @Binding var selectedGame: EmulationGame? + var body: some View { + ScrollView(.horizontal, showsIndicators: false) { + HStack(spacing: 20) { + ForEach(core.games) { game in + GameIconView(game: game, selectedGame: $selectedGame) + } + } + } + } +} diff --git a/src/ios/GameListView.swift b/src/ios/GameListView.swift new file mode 100644 index 0000000000..1c8d39e780 --- /dev/null +++ b/src/ios/GameListView.swift @@ -0,0 +1,140 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import SwiftUI +import Foundation +import UIKit +import UniformTypeIdentifiers +import AppUI + +struct GameListView: View { + @State var core: Core + @State private var searchText = "" + @State var game: Int = 1 + @State var startgame: Bool = false + @Binding var isGridView: Bool + @State var showAlert = false + @State var alertMessage: Alert? = nil + + var body: some View { + let filteredGames = core.games.filter { game in + guard let EmulationGame = game as? PoYuzume else { return false } + return searchText.isEmpty || EmulationGame.title.localizedCaseInsensitiveContains(searchText) + } + + ScrollView { + VStack { + VStack(alignment: .leading) { + + if isGridView { + LazyVGrid(columns: [GridItem(.adaptive(minimum: 160))], spacing: 10) { + ForEach(0.. String { + guard let s = infoDictionary?["CFBundleShortVersionString"] as? String else { + return "Unknown" + } + return s + } +} diff --git a/src/ios/JoystickView.swift b/src/ios/JoystickView.swift new file mode 100644 index 0000000000..24ce8e4739 --- /dev/null +++ b/src/ios/JoystickView.swift @@ -0,0 +1,55 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import SwiftUI +import SwiftUIJoystick +import AppUI + +public struct Joystick: View { + @State var iscool: Bool? = nil + var id: Int { + if onscreenjoy { + return 8 + } + return 0 + } + @AppStorage("onscreenhandheld") var onscreenjoy: Bool = false + + let appui = AppUI.shared + + @ObservedObject public var joystickMonitor = JoystickMonitor() + private let dragDiameter: CGFloat = 160 + private let shape: JoystickShape = .circle + + public var body: some View { + VStack{ + JoystickBuilder( + monitor: self.joystickMonitor, + width: self.dragDiameter, + shape: .circle, + background: { + // Example Background + RoundedRectangle(cornerRadius: 8).fill(Color.gray.opacity(0)) + }, + foreground: { + // Example Thumb + Circle().fill(Color.gray) + }, + locksInPlace: false) + .onChange(of: self.joystickMonitor.xyPoint) { newValue in + let scaledX = Float(newValue.x) + let scaledY = Float(-newValue.y) // my dumbass broke this by having -y instead of y :/ (well it appears that with the new joystick code, its supposed to be -y) + joystickMonitor.objectWillChange + print("Joystick Position: (\(scaledX), \(scaledY))") + + if iscool != nil { + appui.thumbstickMoved(analog: .right, x: scaledX, y: scaledY, controllerid: id) + } else { + appui.thumbstickMoved(analog: .left, x: scaledX, y: scaledY, controllerid: id) + } + } + } + } +} diff --git a/src/ios/KeyboardHostingController.swift b/src/ios/KeyboardHostingController.swift new file mode 100644 index 0000000000..85e1cd01d0 --- /dev/null +++ b/src/ios/KeyboardHostingController.swift @@ -0,0 +1,76 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import SwiftUI +import UIKit + +class KeyboardHostingController: UIHostingController { + + override var canBecomeFirstResponder: Bool { + return true + } + + override func viewDidLoad() { + super.viewDidLoad() + becomeFirstResponder() // Make sure the view can become the first responder + } + + override var keyCommands: [UIKeyCommand]? { + return [ + UIKeyCommand(input: UIKeyCommand.inputUpArrow, modifierFlags: [], action: #selector(handleKeyCommand)), + UIKeyCommand(input: UIKeyCommand.inputDownArrow, modifierFlags: [], action: #selector(handleKeyCommand)), + UIKeyCommand(input: UIKeyCommand.inputLeftArrow, modifierFlags: [], action: #selector(handleKeyCommand)), + UIKeyCommand(input: UIKeyCommand.inputRightArrow, modifierFlags: [], action: #selector(handleKeyCommand)), + UIKeyCommand(input: "w", modifierFlags: [], action: #selector(handleKeyCommand)), + UIKeyCommand(input: "s", modifierFlags: [], action: #selector(handleKeyCommand)), + UIKeyCommand(input: "a", modifierFlags: [], action: #selector(handleKeyCommand)), + UIKeyCommand(input: "d", modifierFlags: [], action: #selector(handleKeyCommand)) + ] + } + + @objc func handleKeyCommand(_ sender: UIKeyCommand) { + if let input = sender.input { + switch input { + case UIKeyCommand.inputUpArrow: + print("Up Arrow Pressed") + case UIKeyCommand.inputDownArrow: + print("Down Arrow Pressed") + case UIKeyCommand.inputLeftArrow: + print("Left Arrow Pressed") + case UIKeyCommand.inputRightArrow: + print("Right Arrow Pressed") + case "w": + print("W Key Pressed") + case "s": + print("S Key Pressed") + case "a": + print("A Key Pressed") + case "d": + print("D Key Pressed") + default: + break + } + } + } +} + + +struct KeyboardSupportView: UIViewControllerRepresentable { + let content: Text + + func makeUIViewController(context: Context) -> KeyboardHostingController { + return KeyboardHostingController(rootView: content) + } + + func updateUIViewController(_ uiViewController: KeyboardHostingController, context: Context) { + // Handle any updates needed + } +} + +struct KeyboardView: View { + var body: some View { + KeyboardSupportView(content: Text("")) + } +} diff --git a/src/ios/LibraryView.swift b/src/ios/LibraryView.swift new file mode 100644 index 0000000000..c85b39608d --- /dev/null +++ b/src/ios/LibraryView.swift @@ -0,0 +1,191 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import SwiftUI +import CryptoKit +import AppUI + +struct LibraryView: View { + @Binding var core: Core + @State var isGridView: Bool = true + @State var doesitexist = (false, false) + @State var importedgame: EmulationGame? = nil + @State var importgame: Bool = false + @State var isimportingfirm: Bool = false + @State var launchGame: Bool = false + var body: some View { + NavigationStack { + if let importedgame = importedgame { + NavigationLink( + isActive: $launchGame, + destination: { + EmulationView(game: importedgame).toolbar(.hidden, for: .tabBar) + }, + label: { + EmptyView() // This keeps the link hidden + } + ) + } + + VStack { + if doesitexist.0, doesitexist.1 { + HomeView(core: core) + } else { + let (doesKeyExist, doesProdExist) = doeskeysexist() + ScrollView { + Text("You Are Missing These Files:") + .font(.headline) + .foregroundColor(.red) + HStack { + if !doesProdExist { + Text("Prod.keys") + .font(.subheadline) + .foregroundColor(.red) + } + if !doesKeyExist { + Text("Title.keys") + .font(.subheadline) + .foregroundColor(.red) + } + } + Text("These goes into the Keys folder") + .font(.caption) + .foregroundColor(.red) + .padding(.bottom) + + if !LibraryManager.shared.homebrewroms().isEmpty { + Text("Homebrew Roms:") + .font(.headline) + LazyVGrid(columns: [GridItem(.adaptive(minimum: 160))], spacing: 10) { + ForEach(LibraryManager.shared.homebrewroms()) { game in + NavigationLink(destination: EmulationView(game: game).toolbar(.hidden, for: .tabBar)) { + // GameButtonView(game: game) + // .frame(maxWidth: .infinity, minHeight: 200) + } + .contextMenu { + NavigationLink(destination: EmulationView(game: game)) { + Text("Launch") + } + } + } + } + } + } + .refreshable { + doesitexist = doeskeysexist() + } + + + } + + } + .fileImporter(isPresented: $isimportingfirm, allowedContentTypes: [.zip], onCompletion: { result in + switch result { + case .success(let elements): + core.AddFirmware(at: elements) + case .failure(let error): + + print(error.localizedDescription) + } + }) + .fileImporter(isPresented: $importgame, allowedContentTypes: [.item], onCompletion: { result in + switch result { + case .success(let elements): + let iscustom = elements.startAccessingSecurityScopedResource() + let information = AppUI.shared.information(for: elements) + + let game = EmulationGame(developer: information.developer, fileURL: elements, + imageData: information.iconData, + title: information.title) + + importedgame = game + + + DispatchQueue.main.async { + + if iscustom { + elements.stopAccessingSecurityScopedResource() + } + + launchGame = true + } + case .failure(let error): + + print(error.localizedDescription) + } + }) + .onAppear() { + doesitexist = doeskeysexist() + } + .navigationBarTitle("Library", displayMode: .inline) + .toolbar { + ToolbarItem(placement: .navigationBarLeading) { // why did this take me so long to figure out lmfao + Button(action: { + isGridView.toggle() + }) { + Image(systemName: isGridView ? "rectangle.grid.1x2" : "square.grid.2x2") + .imageScale(.large) + .padding() + } + } + + ToolbarItem(placement: .navigationBarTrailing) { // funsies + Menu { + Button(action: { + importgame = true // this part took a while + + }) { + Text("Launch Game") + } + + Button(action: { + isimportingfirm = true + }) { + Text("Import Firmware") + } + } label: { + Image(systemName: "plus.circle.fill") + .imageScale(.large) + .padding() + } + + } + } + } + } + + + func doeskeysexist() -> (Bool, Bool) { + var doesprodexist = false + var doestitleexist = false + + + let title = core.root.appendingPathComponent("keys").appendingPathComponent("title.keys") + let prod = core.root.appendingPathComponent("keys").appendingPathComponent("prod.keys") + let fileManager = FileManager.default + let documentsDirectory = fileManager.urls(for: .documentDirectory, in: .userDomainMask)[0] + + if fileManager.fileExists(atPath: prod.path) { + doesprodexist = true + } else { + print("File does not exist") + } + + if fileManager.fileExists(atPath: title.path) { + doestitleexist = true + } else { + print("File does not exist") + } + + return (doestitleexist, doesprodexist) + } +} + +func getDeveloperNames() -> String { + guard let s = infoDictionary?["CFBundleIdentifier"] as? String else { + return "Unknown" + } + return s +} diff --git a/src/ios/MetalView.swift b/src/ios/MetalView.swift new file mode 100644 index 0000000000..c6a746e9f2 --- /dev/null +++ b/src/ios/MetalView.swift @@ -0,0 +1,23 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import SwiftUI +import Metal +import AppUI + +struct MetalView: UIViewRepresentable { + let device: MTLDevice? + let configure: (UIView) -> Void + + func makeUIView(context: Context) -> EmulationScreenView { + let view = EmulationScreenView() + configure(view.primaryScreen) + return view + } + + func updateUIView(_ uiView: EmulationScreenView, context: Context) { + // + } +} diff --git a/src/ios/NavView.swift b/src/ios/NavView.swift new file mode 100644 index 0000000000..b989dd5925 --- /dev/null +++ b/src/ios/NavView.swift @@ -0,0 +1,26 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import SwiftUI +import AppUI + +struct NavView: View { + @Binding var core: Core + @State private var selectedTab = 0 + var body: some View { + TabView(selection: $selectedTab) { + LibraryView(core: $core) + .tabItem { Label("Library", systemImage: "rectangle.on.rectangle") } + .tag(0) + BootOSView(core: $core, currentnavigarion: $selectedTab) + .toolbar(.hidden, for: .tabBar) + .tabItem { Label("Boot OS", systemImage: "house") } + .tag(1) + SettingsView(core: core) + .tabItem { Label("Settings", systemImage: "gear") } + .tag(2) + } + } +} diff --git a/src/ios/PomeloApp.swift b/src/ios/PomeloApp.swift new file mode 100644 index 0000000000..4a48939f54 --- /dev/null +++ b/src/ios/PomeloApp.swift @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import SwiftUI + +infix operator --: LogicalDisjunctionPrecedence + +func --(lhs: Bool, rhs: Bool) -> Bool { + return lhs || rhs +} + +@main +struct PomeloApp: App { + var body: some Scene { + WindowGroup { ContentView() } + } +} diff --git a/src/ios/SettingsView.swift b/src/ios/SettingsView.swift new file mode 100644 index 0000000000..dca90e7574 --- /dev/null +++ b/src/ios/SettingsView.swift @@ -0,0 +1,18 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-FileCopyrightText: Copyright 2024 Pomelo, Stossy11 +// SPDX-License-Identifier: GPL-3.0-or-later + +import SwiftUI + +struct SettingsView: View { + @State var core: Core + @State var showprompt = false + + @AppStorage("icon") var iconused = 1 + var body: some View { + NavigationStack { + + } + } +} diff --git a/src/ios/VMA.cpp b/src/ios/VMA.cpp new file mode 100644 index 0000000000..7e2ea49c28 --- /dev/null +++ b/src/ios/VMA.cpp @@ -0,0 +1,5 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#define VMA_IMPLEMENTATION +#include "video_core/vulkan_common/vma.h" diff --git a/src/qt_common/CMakeLists.txt b/src/qt_common/CMakeLists.txt index 904b03d288..399fbe67a0 100644 --- a/src/qt_common/CMakeLists.txt +++ b/src/qt_common/CMakeLists.txt @@ -50,7 +50,6 @@ if (USE_DISCORD_PRESENCE) if (YUZU_USE_BUNDLED_OPENSSL) target_link_libraries(qt_common PUBLIC OpenSSL::SSL OpenSSL::Crypto) - target_compile_definitions(qt_common PRIVATE CPPHTTPLIB_OPENSSL_SUPPORT) endif() target_compile_definitions(qt_common PUBLIC USE_DISCORD_PRESENCE) diff --git a/src/qt_common/discord/discord_impl.cpp b/src/qt_common/discord/discord_impl.cpp index 37b24cdd57..c1cb5319dc 100644 --- a/src/qt_common/discord/discord_impl.cpp +++ b/src/qt_common/discord/discord_impl.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include "common/httplib.h" #include #include diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 362b068656..3a572eb875 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -369,7 +369,6 @@ else() else() target_compile_options(video_core PRIVATE $<$:-Werror=conversion>) endif() - target_compile_options(video_core PRIVATE $<$:-Wno-sign-conversion>) # xbyak diff --git a/src/video_core/vulkan_common/vma.h b/src/video_core/vulkan_common/vma.h index e022b2bf7d..514ff4501c 100644 --- a/src/video_core/vulkan_common/vma.h +++ b/src/video_core/vulkan_common/vma.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -13,9 +13,14 @@ #ifdef _MSC_VER #pragma warning( push ) #pragma warning( disable : 4189 ) +#elif defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-variable" #endif #include "vk_mem_alloc.h" #ifdef _MSC_VER #pragma warning( pop ) +#elif defined(__clang__) +#pragma clang diagnostic pop #endif diff --git a/src/web_service/CMakeLists.txt b/src/web_service/CMakeLists.txt index 0dedad16f7..a3acdfcf0f 100644 --- a/src/web_service/CMakeLists.txt +++ b/src/web_service/CMakeLists.txt @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +# SPDX-License-Identifier: GPL-3.0-or-later + # SPDX-FileCopyrightText: 2018 yuzu Emulator Project # SPDX-License-Identifier: GPL-2.0-or-later diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp index 60b11fff5a..1e71406989 100644 --- a/src/web_service/web_backend.cpp +++ b/src/web_service/web_backend.cpp @@ -16,7 +16,7 @@ #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" #endif #endif -#include +#include "common/httplib.h" #ifdef __GNUC__ #pragma GCC diagnostic pop #endif diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index 1ed1fdff2a..d94487ec79 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -369,7 +369,7 @@ if (APPLE) if (CMAKE_GENERATOR MATCHES "Xcode") set(_icons "${_dist}/eden.icon") - set_target_properties(eden PROPERTIES + set_target_properties(yuzu PROPERTIES XCODE_ATTRIBUTE_ASSETCATALOG_COMPILER_APPICON_NAME eden MACOSX_BUNDLE_ICON_FILE eden # Also force xcode to manage signing for us. diff --git a/tools/cpm/package/fetch.sh b/tools/cpm/package/fetch.sh index b03a824146..54ea8c64c4 100755 --- a/tools/cpm/package/fetch.sh +++ b/tools/cpm/package/fetch.sh @@ -83,7 +83,7 @@ ci_package() { android-aarch64 android-x86_64 \ solaris-amd64 freebsd-amd64 openbsd-amd64 \ linux-amd64 linux-aarch64 \ - macos-universal; do + macos-universal ios-aarch64; do echo "-- * platform $platform" case $DISABLED in diff --git a/tools/cpm/package/util/interactive.sh b/tools/cpm/package/util/interactive.sh index 99db77e20d..07087e5d10 100755 --- a/tools/cpm/package/util/interactive.sh +++ b/tools/cpm/package/util/interactive.sh @@ -151,7 +151,7 @@ mingw-amd64 mingw-arm64 android-aarch64 android-x86_64 solaris-amd64 freebsd-amd64 openbsd-amd64 linux-amd64 linux-aarch64 -macos-universal" +macos-universal ios-aarch64" DISABLED_PLATFORMS="$reply" fi