diff --git a/.github/ISSUE_TEMPLATE/blank_issue_template.yml b/.forgejo/ISSUE_TEMPLATE/blank_issue_template.yml similarity index 100% rename from .github/ISSUE_TEMPLATE/blank_issue_template.yml rename to .forgejo/ISSUE_TEMPLATE/blank_issue_template.yml diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.forgejo/ISSUE_TEMPLATE/bug_report.yml similarity index 100% rename from .github/ISSUE_TEMPLATE/bug_report.yml rename to .forgejo/ISSUE_TEMPLATE/bug_report.yml diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.forgejo/ISSUE_TEMPLATE/config.yml similarity index 100% rename from .github/ISSUE_TEMPLATE/config.yml rename to .forgejo/ISSUE_TEMPLATE/config.yml diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.forgejo/ISSUE_TEMPLATE/feature_request.yml similarity index 100% rename from .github/ISSUE_TEMPLATE/feature_request.yml rename to .forgejo/ISSUE_TEMPLATE/feature_request.yml diff --git a/.github/workflows/license-header.yml b/.forgejo/workflows/license-header.yml similarity index 100% rename from .github/workflows/license-header.yml rename to .forgejo/workflows/license-header.yml diff --git a/.github/workflows/sources.yml b/.forgejo/workflows/sources.yml similarity index 100% rename from .github/workflows/sources.yml rename to .forgejo/workflows/sources.yml diff --git a/.github/workflows/strings.yml b/.forgejo/workflows/strings.yml similarity index 100% rename from .github/workflows/strings.yml rename to .forgejo/workflows/strings.yml diff --git a/.github/workflows/translations.yml b/.forgejo/workflows/translations.yml similarity index 97% rename from .github/workflows/translations.yml rename to .forgejo/workflows/translations.yml index 92bb1fdf5d..16ce4f1808 100644 --- a/.github/workflows/translations.yml +++ b/.forgejo/workflows/translations.yml @@ -3,8 +3,7 @@ name: tx-pull on: # monday, wednesday, saturday at 2pm schedule: - cron: - - '0 14 * * 1,3,6' + cron: '0 14 * * 1,3,6' workflow_dispatch: jobs: @@ -59,4 +58,3 @@ jobs: -H 'Authorization: Bearer ${{ secrets.CI_FJ_TOKEN }}' \ -H 'Content-Type: application/json' \ -d "@data.json" --fail - diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 5b0adad8dd..947a4963ee 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -37,10 +37,10 @@ set(GIT_DESC ${BUILD_VERSION}) # Auto-updater metadata! Must somewhat mirror GitHub API endpoint if (NIGHTLY_BUILD) - set(BUILD_AUTO_UPDATE_WEBSITE "https://github.com") - set(BUILD_AUTO_UPDATE_API "api.github.com") - set(BUILD_AUTO_UPDATE_API_PATH "/repos/") - set(BUILD_AUTO_UPDATE_REPO "Eden-CI/Nightly") + set(BUILD_AUTO_UPDATE_WEBSITE "https://git.eden-emu.dev") + set(BUILD_AUTO_UPDATE_API "git.eden-emu.dev") + set(BUILD_AUTO_UPDATE_API_PATH "/api/v1/repos/") + set(BUILD_AUTO_UPDATE_REPO "eden-ci/nightly") set(REPO_NAME "Eden Nightly") else() set(BUILD_AUTO_UPDATE_WEBSITE "https://git.eden-emu.dev") diff --git a/dist/dev.eden_emu.eden.svg b/dist/dev.eden_emu.eden.svg index f88b52f625..7711945aa4 100644 --- a/dist/dev.eden_emu.eden.svg +++ b/dist/dev.eden_emu.eden.svg @@ -1,203 +1,21 @@ + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + inkscape:current-layer="layer1" /> + + + + + + + + + + + + + + + diff --git a/dist/eden.bmp b/dist/eden.bmp index 888138ccf7..cffc04b308 100644 Binary files a/dist/eden.bmp and b/dist/eden.bmp differ diff --git a/dist/eden.ico b/dist/eden.ico index 45120ef312..106742c9ba 100644 Binary files a/dist/eden.ico and b/dist/eden.ico differ diff --git a/dist/icon_variations/aprilfools2026.svg b/dist/icon_variations/aprilfools2026.svg new file mode 100644 index 0000000000..7711945aa4 --- /dev/null +++ b/dist/icon_variations/aprilfools2026.svg @@ -0,0 +1,89 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/dist/icon_variations/aprilfools2026_bgcolor b/dist/icon_variations/aprilfools2026_bgcolor new file mode 100644 index 0000000000..fabebfa717 --- /dev/null +++ b/dist/icon_variations/aprilfools2026_bgcolor @@ -0,0 +1 @@ +#43fcfcff diff --git a/dist/qt_themes/default/icons/256x256/eden.png b/dist/qt_themes/default/icons/256x256/eden.png index 3c4bd566a1..d7286ac4c6 100644 Binary files a/dist/qt_themes/default/icons/256x256/eden.png and b/dist/qt_themes/default/icons/256x256/eden.png differ diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 00bdf10a4f..ba0545b7a7 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -62,6 +62,12 @@ endif() # unordered_dense AddJsonPackage(unordered-dense) +# httplib +if (IOS) + set(HTTPLIB_USE_BROTLI_IF_AVAILABLE OFF) +endif() +AddJsonPackage(httplib) + if (YUZU_STATIC_ROOM) return() endif() @@ -227,9 +233,6 @@ if (VulkanMemoryAllocator_ADDED) endif() endif() -# httplib -AddJsonPackage(httplib) - # cpp-jwt if (ENABLE_WEB_SERVICE OR ENABLE_UPDATE_CHECKER) AddJsonPackage(cpp-jwt) diff --git a/externals/cpmfile.json b/externals/cpmfile.json index f849426a4d..03303a5896 100644 --- a/externals/cpmfile.json +++ b/externals/cpmfile.json @@ -36,7 +36,8 @@ "0002-fix-zstd.patch" ], "options": [ - "HTTPLIB_REQUIRE_OPENSSL ON" + "HTTPLIB_REQUIRE_OPENSSL ON", + "HTTPLIB_DISABLE_MACOSX_AUTOMATIC_ROOT_CERTIFICATES ON" ] }, "cpp-jwt": { diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt index 2764d7eac6..44290fd4b6 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt @@ -25,6 +25,11 @@ import android.hardware.SensorEventListener import android.hardware.SensorManager import android.os.Build import android.os.Bundle +import android.os.Handler +import android.os.Looper +import androidx.navigation.NavOptions +import org.yuzu.yuzu_emu.fragments.EmulationFragment +import org.yuzu.yuzu_emu.utils.CustomSettingsHandler import android.util.Rational import android.view.InputDevice import android.view.KeyEvent @@ -87,6 +92,28 @@ class EmulationActivity : AppCompatActivity(), SensorEventListener, InputManager private val emulationViewModel: EmulationViewModel by viewModels() private var foregroundService: Intent? = null + private val mainHandler = Handler(Looper.getMainLooper()) + private var pendingRomSwapIntent: Intent? = null + private var isWaitingForRomSwapStop = false + private var romSwapNativeStopped = false + private var romSwapThreadStopped = false + private var romSwapGeneration = 0 + private var hasEmulationSession = processHasEmulationSession + private val romSwapStopTimeoutRunnable = Runnable { onRomSwapStopTimeout() } + + private fun onRomSwapStopTimeout() { + if (!isWaitingForRomSwapStop) { + return + } + Log.warning("[EmulationActivity] ROM swap stop timed out; retrying native stop and continuing to wait") + NativeLibrary.stopEmulation() + scheduleRomSwapStopTimeout() + } + + private fun scheduleRomSwapStopTimeout() { + mainHandler.removeCallbacks(romSwapStopTimeoutRunnable) + mainHandler.postDelayed(romSwapStopTimeoutRunnable, ROM_SWAP_STOP_TIMEOUT_MS) + } override fun attachBaseContext(base: Context) { super.attachBaseContext(YuzuApplication.applyLanguage(base)) @@ -128,9 +155,29 @@ class EmulationActivity : AppCompatActivity(), SensorEventListener, InputManager binding = ActivityEmulationBinding.inflate(layoutInflater) setContentView(binding.root) + val launchIntent = Intent(intent) + val shouldDeferLaunchForSwap = hasEmulationSession && isSwapIntent(launchIntent) + if (shouldDeferLaunchForSwap) { + Log.info("[EmulationActivity] onCreate detected existing session; deferring new game setup for swap") + emulationViewModel.setIsEmulationStopping(true) + emulationViewModel.setEmulationStopped(false) + } + val navHostFragment = supportFragmentManager.findFragmentById(R.id.fragment_container) as NavHostFragment - navHostFragment.navController.setGraph(R.navigation.emulation_navigation, intent.extras) + val initialArgs = if (shouldDeferLaunchForSwap) { + Bundle(intent.extras ?: Bundle()).apply { + processSessionGame?.let { putParcelable("game", it) } + } + } else { + intent.extras + } + navHostFragment.navController.setGraph(R.navigation.emulation_navigation, initialArgs) + if (shouldDeferLaunchForSwap) { + mainHandler.post { + handleSwapIntent(launchIntent) + } + } isActivityRecreated = savedInstanceState != null @@ -210,6 +257,7 @@ class EmulationActivity : AppCompatActivity(), SensorEventListener, InputManager } override fun onDestroy() { + mainHandler.removeCallbacks(romSwapStopTimeoutRunnable) super.onDestroy() inputManager.unregisterInputDeviceListener(this) stopForegroundService(this) @@ -228,17 +276,123 @@ class EmulationActivity : AppCompatActivity(), SensorEventListener, InputManager override fun onNewIntent(intent: Intent) { super.onNewIntent(intent) - setIntent(intent) - - // Reset navigation graph with new intent data to recreate EmulationFragment - val navHostFragment = - supportFragmentManager.findFragmentById(R.id.fragment_container) as NavHostFragment - navHostFragment.navController.setGraph(R.navigation.emulation_navigation, intent.extras) - + handleSwapIntent(intent) nfcReader.onNewIntent(intent) InputHandler.updateControllerData() } + private fun isSwapIntent(intent: Intent): Boolean { + return when { + intent.getBooleanExtra(EXTRA_OVERLAY_GAMELESS_EDIT_MODE, false) -> false + intent.action == CustomSettingsHandler.CUSTOM_CONFIG_ACTION -> true + intent.data != null -> true + else -> { + val extras = intent.extras + extras != null && + BundleCompat.getParcelable(extras, EXTRA_SELECTED_GAME, Game::class.java) != null + } + } + } + + private fun handleSwapIntent(intent: Intent) { + if (!isSwapIntent(intent)) { + return + } + + pendingRomSwapIntent = Intent(intent) + + if (!isWaitingForRomSwapStop) { + Log.info("[EmulationActivity] Begin ROM swap: data=${intent.data}") + isWaitingForRomSwapStop = true + romSwapNativeStopped = false + romSwapThreadStopped = false + romSwapGeneration += 1 + val thisSwapGeneration = romSwapGeneration + emulationViewModel.setIsEmulationStopping(true) + emulationViewModel.setEmulationStopped(false) + val navHostFragment = + supportFragmentManager.findFragmentById(R.id.fragment_container) as? NavHostFragment + val childFragmentManager = navHostFragment?.childFragmentManager + val stoppingFragmentForSwap = + (childFragmentManager?.primaryNavigationFragment as? EmulationFragment) ?: + childFragmentManager + ?.fragments + ?.asReversed() + ?.firstOrNull { + it is EmulationFragment && + it.isAdded && + it.view != null && + !it.isRemoving + } as? EmulationFragment + + val hasSessionForSwap = hasEmulationSession || stoppingFragmentForSwap != null + + if (!hasSessionForSwap) { + romSwapNativeStopped = true + romSwapThreadStopped = true + } else { + if (stoppingFragmentForSwap != null) { + stoppingFragmentForSwap.stopForRomSwap() + stoppingFragmentForSwap.notifyWhenEmulationThreadStops { + if (!isWaitingForRomSwapStop || romSwapGeneration != thisSwapGeneration) { + return@notifyWhenEmulationThreadStops + } + romSwapThreadStopped = true + Log.info("[EmulationActivity] ROM swap thread stop acknowledged") + launchPendingRomSwap(force = false) + } + } else { + Log.warning("[EmulationActivity] ROM swap stop target fragment not found; requesting native stop") + romSwapThreadStopped = true + NativeLibrary.stopEmulation() + } + + scheduleRomSwapStopTimeout() + } + } + + launchPendingRomSwap(force = false) + } + + private fun launchPendingRomSwap(force: Boolean) { + if (!isWaitingForRomSwapStop) { + return + } + if (!force && (!romSwapNativeStopped || !romSwapThreadStopped)) { + return + } + val swapIntent = pendingRomSwapIntent ?: return + Log.info("[EmulationActivity] Launching pending ROM swap: data=${swapIntent.data}") + pendingRomSwapIntent = null + isWaitingForRomSwapStop = false + romSwapNativeStopped = false + romSwapThreadStopped = false + mainHandler.removeCallbacks(romSwapStopTimeoutRunnable) + applyGameLaunchIntent(swapIntent) + } + + private fun applyGameLaunchIntent(intent: Intent) { + hasEmulationSession = true + processHasEmulationSession = true + emulationViewModel.setIsEmulationStopping(false) + emulationViewModel.setEmulationStopped(false) + setIntent(Intent(intent)) + val navHostFragment = + supportFragmentManager.findFragmentById(R.id.fragment_container) as NavHostFragment + val navController = navHostFragment.navController + val startArgs = intent.extras?.let { Bundle(it) } ?: Bundle() + val navOptions = NavOptions.Builder() + .setPopUpTo(R.id.emulationFragment, true) + .build() + + runCatching { + navController.navigate(R.id.emulationFragment, startArgs, navOptions) + }.onFailure { + Log.warning("[EmulationActivity] ROM swap navigate fallback to setGraph: ${it.message}") + navController.setGraph(R.navigation.emulation_navigation, startArgs) + } + } + override fun dispatchKeyEvent(event: KeyEvent): Boolean { if (event.keyCode == KeyEvent.KEYCODE_VOLUME_UP || @@ -608,19 +762,48 @@ class EmulationActivity : AppCompatActivity(), SensorEventListener, InputManager } fun onEmulationStarted() { + if (Looper.myLooper() != Looper.getMainLooper()) { + mainHandler.post { onEmulationStarted() } + return + } + hasEmulationSession = true + processHasEmulationSession = true emulationViewModel.setEmulationStarted(true) + emulationViewModel.setIsEmulationStopping(false) + emulationViewModel.setEmulationStopped(false) NativeLibrary.playTimeManagerStart() } fun onEmulationStopped(status: Int) { - if (status == 0 && emulationViewModel.programChanged.value == -1) { + if (Looper.myLooper() != Looper.getMainLooper()) { + mainHandler.post { onEmulationStopped(status) } + return + } + hasEmulationSession = false + processHasEmulationSession = false + if (isWaitingForRomSwapStop) { + romSwapNativeStopped = true + Log.info("[EmulationActivity] ROM swap native stop acknowledged") + launchPendingRomSwap(force = false) + } else if (status == 0 && emulationViewModel.programChanged.value == -1) { + processSessionGame = null finish() + } else if (!isWaitingForRomSwapStop) { + processSessionGame = null } emulationViewModel.setEmulationStopped(true) } + fun updateSessionGame(game: Game?) { + processSessionGame = game + } + fun onProgramChanged(programIndex: Int) { + if (Looper.myLooper() != Looper.getMainLooper()) { + mainHandler.post { onProgramChanged(programIndex) } + return + } emulationViewModel.setProgramChanged(programIndex) } @@ -644,6 +827,11 @@ class EmulationActivity : AppCompatActivity(), SensorEventListener, InputManager companion object { const val EXTRA_SELECTED_GAME = "SelectedGame" const val EXTRA_OVERLAY_GAMELESS_EDIT_MODE = "overlayGamelessEditMode" + private const val ROM_SWAP_STOP_TIMEOUT_MS = 5000L + @Volatile + private var processHasEmulationSession = false + @Volatile + private var processSessionGame: Game? = null fun stopForegroundService(activity: Activity) { val startIntent = Intent(activity, ForegroundService::class.java) diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt index 435fe5fe2c..b67bc6a9cc 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt @@ -50,6 +50,7 @@ import androidx.fragment.app.Fragment import androidx.fragment.app.activityViewModels import androidx.lifecycle.lifecycleScope import androidx.navigation.findNavController +import androidx.navigation.fragment.NavHostFragment import androidx.navigation.fragment.navArgs import androidx.window.layout.FoldingFeature import androidx.window.layout.WindowInfoTracker @@ -135,6 +136,8 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { private var intentGame: Game? = null private var isCustomSettingsIntent = false + private var isStoppingForRomSwap = false + private var deferGameSetupUntilStopCompletes = false private var perfStatsRunnable: Runnable? = null private var socRunnable: Runnable? = null @@ -238,6 +241,14 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { } } + if (emulationViewModel.isEmulationStopping.value) { + deferGameSetupUntilStopCompletes = true + if (game == null) { + game = args.game ?: intentGame + } + return + } + finishGameSetup() } @@ -260,6 +271,7 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { } game = gameToUse + emulationActivity?.updateSessionGame(gameToUse) } catch (e: Exception) { Log.error("[EmulationFragment] Error during game setup: ${e.message}") Toast.makeText( @@ -334,7 +346,8 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { } emulationState = EmulationState(game!!.path) { - return@EmulationState driverViewModel.isInteractionAllowed.value + return@EmulationState driverViewModel.isInteractionAllowed.value && + !isStoppingForRomSwap } } @@ -890,8 +903,12 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { } ) - GameIconUtils.loadGameIcon(game!!, binding.loadingImage) - binding.loadingTitle.text = game!!.title + game?.let { + GameIconUtils.loadGameIcon(it, binding.loadingImage) + binding.loadingTitle.text = it.title + } ?: run { + binding.loadingTitle.text = "" + } binding.loadingTitle.isSelected = true binding.loadingText.isSelected = true @@ -959,6 +976,12 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { ViewUtils.showView(binding.loadingIndicator) ViewUtils.hideView(binding.inputContainer) ViewUtils.hideView(binding.showStatsOverlayText) + } else if (deferGameSetupUntilStopCompletes) { + if (!isAdded) { + return@collect + } + deferGameSetupUntilStopCompletes = false + finishGameSetup() } } emulationViewModel.drawerOpen.collect(viewLifecycleOwner) { @@ -995,26 +1018,24 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { } driverViewModel.isInteractionAllowed.collect(viewLifecycleOwner) { - if (it && !NativeLibrary.isRunning() && !NativeLibrary.isPaused()) { - startEmulation() + if (it && + !isStoppingForRomSwap && + !NativeLibrary.isRunning() && + !NativeLibrary.isPaused() + ) { + if (!DirectoryInitialization.areDirectoriesReady) { + DirectoryInitialization.start() + } + + updateScreenLayout() + + emulationState.run(emulationActivity!!.isActivityRecreated) } } driverViewModel.onLaunchGame() } - private fun startEmulation(programIndex: Int = 0) { - if (!NativeLibrary.isRunning() && !NativeLibrary.isPaused()) { - if (!DirectoryInitialization.areDirectoriesReady) { - DirectoryInitialization.start() - } - - updateScreenLayout() - - emulationState.run(emulationActivity!!.isActivityRecreated, programIndex) - } - } - override fun onConfigurationChanged(newConfig: Configuration) { super.onConfigurationChanged(newConfig) val b = _binding ?: return @@ -1375,6 +1396,9 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { super.onDestroyView() amiiboLoadJob?.cancel() amiiboLoadJob = null + perfStatsRunnable?.let { perfStatsUpdateHandler.removeCallbacks(it) } + socRunnable?.let { socUpdateHandler.removeCallbacks(it) } + handler.removeCallbacksAndMessages(null) clearPausedFrame() _binding?.surfaceInputOverlay?.touchEventListener = null _binding = null @@ -1382,7 +1406,9 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { } override fun onDetach() { - NativeLibrary.clearEmulationActivity() + if (!hasNewerEmulationFragment()) { + NativeLibrary.clearEmulationActivity() + } super.onDetach() } @@ -1840,10 +1866,74 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { } override fun surfaceDestroyed(holder: SurfaceHolder) { - emulationState.clearSurface() + if (this::emulationState.isInitialized && !hasNewerEmulationFragment()) { + emulationState.clearSurface() + } emulationStarted = false } + private fun hasNewerEmulationFragment(): Boolean { + val activity = emulationActivity ?: return false + return try { + val navHostFragment = + activity.supportFragmentManager.findFragmentById(R.id.fragment_container) as? NavHostFragment + ?: return false + val currentFragment = navHostFragment.childFragmentManager.fragments + .filterIsInstance() + .firstOrNull() + currentFragment != null && currentFragment !== this + } catch (_: Exception) { + false + } + } + + // xbzk: called from EmulationActivity when a new game is loaded while this fragment is still active, + // to wait for the emulation thread to stop before allowing the ROM swap to proceed + fun notifyWhenEmulationThreadStops(onStopped: () -> Unit) { + if (!this::emulationState.isInitialized) { + onStopped() + return + } + val emuThread = runCatching { emulationState.emulationThread }.getOrNull() + if (emuThread == null || !emuThread.isAlive) { + onStopped() + return + } + Thread({ + runCatching { emuThread.join() } + Handler(Looper.getMainLooper()).post { + onStopped() + } + }, "RomSwapWait").start() + } + + // xbzk: called from EmulationActivity when a new game is loaded while this + // fragment is still active, to stop the current emulation before swapping the ROM + fun stopForRomSwap() { + if (isStoppingForRomSwap) { + return + } + isStoppingForRomSwap = true + clearPausedFrame() + emulationViewModel.setIsEmulationStopping(true) + _binding?.let { + binding.loadingText.setText(R.string.shutting_down) + ViewUtils.showView(binding.loadingIndicator) + ViewUtils.hideView(binding.inputContainer) + ViewUtils.hideView(binding.showStatsOverlayText) + } + if (this::emulationState.isInitialized) { + emulationState.stop() + if (NativeLibrary.isRunning() || NativeLibrary.isPaused()) { + Log.warning("[EmulationFragment] ROM swap stop fallback: forcing native stop request.") + NativeLibrary.stopEmulation() + } + } else { + NativeLibrary.stopEmulation() + } + NativeConfig.reloadGlobalConfig() + } + private fun showOverlayOptions() { val anchor = binding.inGameMenu.findViewById(R.id.menu_overlay_controls) val popup = PopupMenu(requireContext(), anchor) @@ -2134,6 +2224,7 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { state = State.STOPPED } else { Log.warning("[EmulationFragment] Stop called while already stopped.") + NativeLibrary.stopEmulation() } } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/GamePropertiesFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/GamePropertiesFragment.kt index faa35bc3eb..c3dea79bae 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/GamePropertiesFragment.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/GamePropertiesFragment.kt @@ -36,6 +36,7 @@ import org.yuzu.yuzu_emu.databinding.FragmentGamePropertiesBinding import org.yuzu.yuzu_emu.features.DocumentProvider import org.yuzu.yuzu_emu.features.settings.model.Settings import org.yuzu.yuzu_emu.features.settings.ui.SettingsSubscreen +import org.yuzu.yuzu_emu.model.AddonViewModel import org.yuzu.yuzu_emu.model.DriverViewModel import org.yuzu.yuzu_emu.model.GameProperty import org.yuzu.yuzu_emu.model.GamesViewModel @@ -46,6 +47,7 @@ import org.yuzu.yuzu_emu.model.SubmenuProperty import org.yuzu.yuzu_emu.model.TaskState import org.yuzu.yuzu_emu.utils.DirectoryInitialization import org.yuzu.yuzu_emu.utils.FileUtil +import org.yuzu.yuzu_emu.utils.GameHelper import org.yuzu.yuzu_emu.utils.GameIconUtils import org.yuzu.yuzu_emu.utils.GpuDriverHelper import org.yuzu.yuzu_emu.utils.MemoryUtil @@ -61,6 +63,7 @@ class GamePropertiesFragment : Fragment() { private val homeViewModel: HomeViewModel by activityViewModels() private val gamesViewModel: GamesViewModel by activityViewModels() + private val addonViewModel: AddonViewModel by activityViewModels() private val driverViewModel: DriverViewModel by activityViewModels() private val args by navArgs() @@ -118,6 +121,20 @@ class GamePropertiesFragment : Fragment() { .show(childFragmentManager, LaunchGameDialogFragment.TAG) } + if (GameHelper.cachedGameList.isEmpty()) { + binding.buttonStart.isEnabled = false + viewLifecycleOwner.lifecycleScope.launch { + withContext(Dispatchers.IO) { + GameHelper.restoreContentForGame(args.game) + } + if (_binding == null) { + return@launch + } + addonViewModel.onAddonsViewStarted(args.game) + binding.buttonStart.isEnabled = true + } + } + reloadList() homeViewModel.openImportSaves.collect( diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/GamesViewModel.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/GamesViewModel.kt index 39ff038034..1a63a3ad82 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/GamesViewModel.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/GamesViewModel.kt @@ -100,42 +100,45 @@ class GamesViewModel : ViewModel() { viewModelScope.launch { withContext(Dispatchers.IO) { - if (firstStartup) { - // Retrieve list of cached games - val storedGames = - PreferenceManager.getDefaultSharedPreferences(YuzuApplication.appContext) - .getStringSet(GameHelper.KEY_GAMES, emptySet()) - if (storedGames!!.isNotEmpty()) { - val deserializedGames = mutableSetOf() - storedGames.forEach { - val game: Game - try { - game = Json.decodeFromString(it) - } catch (e: Exception) { - // We don't care about any errors related to parsing the game cache - return@forEach - } + try { + if (firstStartup) { + // Retrieve list of cached games + val storedGames = + PreferenceManager.getDefaultSharedPreferences(YuzuApplication.appContext) + .getStringSet(GameHelper.KEY_GAMES, emptySet()) + if (storedGames!!.isNotEmpty()) { + val deserializedGames = mutableSetOf() + storedGames.forEach { + val game: Game + try { + game = Json.decodeFromString(it) + } catch (e: Exception) { + // We don't care about any errors related to parsing the game cache + return@forEach + } - val gameExists = - DocumentFile.fromSingleUri( - YuzuApplication.appContext, - Uri.parse(game.path) - )?.exists() - if (gameExists == true) { - deserializedGames.add(game) + val gameExists = + DocumentFile.fromSingleUri( + YuzuApplication.appContext, + Uri.parse(game.path) + )?.exists() + if (gameExists == true) { + deserializedGames.add(game) + } } + setGames(deserializedGames.toList()) } - setGames(deserializedGames.toList()) } - } - setGames(GameHelper.getGames()) - reloading.set(false) - _isReloading.value = false - _shouldScrollAfterReload.value = true + setGames(GameHelper.getGames()) + _shouldScrollAfterReload.value = true - if (directoriesChanged) { - setShouldSwapData(true) + if (directoriesChanged) { + setShouldSwapData(true) + } + } finally { + reloading.set(false) + _isReloading.value = false } } } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/DirectoryInitialization.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/DirectoryInitialization.kt index f47c60491b..f961c5e984 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/DirectoryInitialization.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/DirectoryInitialization.kt @@ -23,8 +23,8 @@ object DirectoryInitialization { fun start() { if (!areDirectoriesReady) { initializeInternalStorage() - NativeLibrary.initializeSystem(false) NativeConfig.initializeGlobalConfig() + NativeLibrary.initializeSystem(false) NativeLibrary.reloadProfiles() migrateSettings() areDirectoriesReady = true diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GameHelper.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GameHelper.kt index 4a3cf61daa..64e035afbe 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GameHelper.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GameHelper.kt @@ -8,9 +8,11 @@ package org.yuzu.yuzu_emu.utils import android.content.SharedPreferences import android.net.Uri +import android.provider.DocumentsContract import androidx.preference.PreferenceManager import kotlinx.serialization.encodeToString import kotlinx.serialization.json.Json +import java.io.File import org.yuzu.yuzu_emu.NativeLibrary import org.yuzu.yuzu_emu.YuzuApplication import org.yuzu.yuzu_emu.model.Game @@ -49,29 +51,8 @@ object GameHelper { // Remove previous filesystem provider information so we can get up to date version info NativeLibrary.clearFilesystemProvider() - // Scan External Content directories and register all NSP/XCI files - val externalContentDirs = NativeConfig.getExternalContentDirs() - val uniqueExternalContentDirs = linkedSetOf() - externalContentDirs.forEach { externalDir -> - if (externalDir.isNotEmpty()) { - uniqueExternalContentDirs.add(externalDir) - } - } - val mountedContainerUris = mutableSetOf() - for (externalDir in uniqueExternalContentDirs) { - if (externalDir.isNotEmpty()) { - val externalDirUri = externalDir.toUri() - if (FileUtil.isTreeUriValid(externalDirUri)) { - scanContentContainersRecursive(FileUtil.listFiles(externalDirUri), 3) { - val containerUri = it.uri.toString() - if (mountedContainerUris.add(containerUri)) { - NativeLibrary.addFileToFilesystemProvider(containerUri) - } - } - } - } - } + mountExternalContentDirectories(mountedContainerUris) val badDirs = mutableListOf() gameDirs.forEachIndexed { index: Int, gameDir: GameDir -> @@ -115,6 +96,15 @@ object GameHelper { return games.toList() } + fun restoreContentForGame(game: Game) { + NativeLibrary.reloadKeys() + + val mountedContainerUris = mutableSetOf() + mountExternalContentDirectories(mountedContainerUris) + mountGameFolderContent(Uri.parse(game.path), mountedContainerUris) + NativeLibrary.addFileToFilesystemProvider(game.path) + } + // File extensions considered as external content, buuut should // be done better imo. private val externalContentExtensions = setOf("nsp", "xci") @@ -181,6 +171,71 @@ object GameHelper { } } + private fun mountExternalContentDirectories(mountedContainerUris: MutableSet) { + val uniqueExternalContentDirs = linkedSetOf() + NativeConfig.getExternalContentDirs().forEach { externalDir -> + if (externalDir.isNotEmpty()) { + uniqueExternalContentDirs.add(externalDir) + } + } + + for (externalDir in uniqueExternalContentDirs) { + val externalDirUri = externalDir.toUri() + if (FileUtil.isTreeUriValid(externalDirUri)) { + scanContentContainersRecursive(FileUtil.listFiles(externalDirUri), 3) { + val containerUri = it.uri.toString() + if (mountedContainerUris.add(containerUri)) { + NativeLibrary.addFileToFilesystemProvider(containerUri) + } + } + } + } + } + + private fun mountGameFolderContent(gameUri: Uri, mountedContainerUris: MutableSet) { + if (gameUri.scheme == "content") { + val parentUri = getParentDocumentUri(gameUri) ?: return + scanContentContainersRecursive(FileUtil.listFiles(parentUri), 1) { + val containerUri = it.uri.toString() + if (mountedContainerUris.add(containerUri)) { + NativeLibrary.addGameFolderFileToFilesystemProvider(containerUri) + } + } + return + } + + val gameFile = File(gameUri.path ?: gameUri.toString()) + val parentDir = gameFile.parentFile ?: return + parentDir.listFiles()?.forEach { sibling -> + if (!sibling.isFile) { + return@forEach + } + + val extension = sibling.extension.lowercase() + if (externalContentExtensions.contains(extension)) { + val containerUri = Uri.fromFile(sibling).toString() + if (mountedContainerUris.add(containerUri)) { + NativeLibrary.addGameFolderFileToFilesystemProvider(containerUri) + } + } + } + } + + private fun getParentDocumentUri(uri: Uri): Uri? { + return try { + val documentId = DocumentsContract.getDocumentId(uri) + val separatorIndex = documentId.lastIndexOf('/') + if (separatorIndex == -1) { + null + } else { + val parentDocumentId = documentId.substring(0, separatorIndex) + DocumentsContract.buildDocumentUriUsingTree(uri, parentDocumentId) + } + } catch (_: Exception) { + null + } + } + fun getGame( uri: Uri, addedToLibrary: Boolean, diff --git a/src/android/app/src/main/res/drawable/ic_launcher_foreground.png b/src/android/app/src/main/res/drawable/ic_launcher_foreground.png index 53f1cace9b..8b970cd4cc 100644 Binary files a/src/android/app/src/main/res/drawable/ic_launcher_foreground.png and b/src/android/app/src/main/res/drawable/ic_launcher_foreground.png differ diff --git a/src/android/app/src/main/res/drawable/ic_yuzu.png b/src/android/app/src/main/res/drawable/ic_yuzu.png index fce02afa1f..7e2461ba24 100644 Binary files a/src/android/app/src/main/res/drawable/ic_yuzu.png and b/src/android/app/src/main/res/drawable/ic_yuzu.png differ diff --git a/src/android/app/src/main/res/drawable/ic_yuzu_splash.png b/src/android/app/src/main/res/drawable/ic_yuzu_splash.png index 0e43cb9374..c9404d9937 100644 Binary files a/src/android/app/src/main/res/drawable/ic_yuzu_splash.png and b/src/android/app/src/main/res/drawable/ic_yuzu_splash.png differ diff --git a/src/android/app/src/main/res/mipmap-hdpi/ic_launcher.png b/src/android/app/src/main/res/mipmap-hdpi/ic_launcher.png index 23bc2897c3..74c6677dd9 100644 Binary files a/src/android/app/src/main/res/mipmap-hdpi/ic_launcher.png and b/src/android/app/src/main/res/mipmap-hdpi/ic_launcher.png differ diff --git a/src/android/app/src/main/res/mipmap-mdpi/ic_launcher.png b/src/android/app/src/main/res/mipmap-mdpi/ic_launcher.png index f630e793e3..31a01461b4 100644 Binary files a/src/android/app/src/main/res/mipmap-mdpi/ic_launcher.png and b/src/android/app/src/main/res/mipmap-mdpi/ic_launcher.png differ diff --git a/src/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png b/src/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png index 1daa3c624f..3f0023f573 100644 Binary files a/src/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png and b/src/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png differ diff --git a/src/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png b/src/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png index 7fc64e1393..6e28b3d598 100644 Binary files a/src/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png and b/src/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png differ diff --git a/src/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png b/src/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png index 53ed9b9914..39f583b630 100644 Binary files a/src/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png and b/src/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png differ diff --git a/src/android/app/src/main/res/values/colors.xml b/src/android/app/src/main/res/values/colors.xml index 472567b323..ad3412ed27 100644 --- a/src/android/app/src/main/res/values/colors.xml +++ b/src/android/app/src/main/res/values/colors.xml @@ -1 +1 @@ -#1F143C +#43fcfcff diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 1ee4794272..2846058df9 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -134,6 +134,8 @@ add_library( typed_address.h uint128.h unique_function.h + random.cpp + random.h uuid.cpp uuid.h vector_math.h @@ -144,7 +146,8 @@ add_library( zstd_compression.cpp zstd_compression.h fs/ryujinx_compat.h fs/ryujinx_compat.cpp - fs/symlink.h fs/symlink.cpp) + fs/symlink.h fs/symlink.cpp + httplib.h) if(WIN32) target_sources(common PRIVATE windows/timer_resolution.cpp @@ -242,7 +245,7 @@ else() target_link_libraries(common PUBLIC Boost::headers) endif() -target_link_libraries(common PUBLIC Boost::filesystem Boost::context) +target_link_libraries(common PUBLIC Boost::filesystem Boost::context httplib::httplib) if (lz4_ADDED) target_include_directories(common PRIVATE ${lz4_SOURCE_DIR}/lib) diff --git a/src/common/httplib.h b/src/common/httplib.h new file mode 100644 index 0000000000..57bc4eeb93 --- /dev/null +++ b/src/common/httplib.h @@ -0,0 +1,18 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#define CPPHTTPLIB_DISABLE_MACOSX_AUTOMATIC_ROOT_CERTIFICATES 1 +#define CPPHTTPLIB_OPENSSL_SUPPORT 1 + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#ifndef __clang__ +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif +#endif +#include +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif diff --git a/src/common/random.cpp b/src/common/random.cpp new file mode 100644 index 0000000000..d951881cd2 --- /dev/null +++ b/src/common/random.cpp @@ -0,0 +1,22 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include +#include "common/random.h" + +namespace Common::Random { + [[nodiscard]] static std::random_device& GetGlobalRandomDevice() noexcept { + static std::random_device g_random_device{}; + return g_random_device; + } + [[nodiscard]] u32 Random32(u32 seed) noexcept { + return GetGlobalRandomDevice()(); + } + [[nodiscard]] u64 Random64(u64 seed) noexcept { + return GetGlobalRandomDevice()(); + } + [[nodiscard]] std::mt19937 GetMT19937() noexcept { + return std::mt19937(GetGlobalRandomDevice()()); + } +} diff --git a/src/common/random.h b/src/common/random.h new file mode 100644 index 0000000000..83210f6dc2 --- /dev/null +++ b/src/common/random.h @@ -0,0 +1,13 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include +#include "common/common_types.h" + +namespace Common::Random { + [[nodiscard]] u32 Random32(u32 seed) noexcept; + [[nodiscard]] u64 Random64(u64 seed) noexcept; + [[nodiscard]] std::mt19937 GetMT19937() noexcept; +} diff --git a/src/common/tiny_mt.h b/src/common/tiny_mt.h index c9f9ed4a5d..4b556a33eb 100644 --- a/src/common/tiny_mt.h +++ b/src/common/tiny_mt.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project @@ -218,12 +218,6 @@ public: return t0; } - u64 GenerateRandomU64() { - const u32 lo = this->GenerateRandomU32(); - const u32 hi = this->GenerateRandomU32(); - return (u64{hi} << 32) | u64{lo}; - } - float GenerateRandomF32() { // Floats have 24 bits of mantissa. constexpr u32 MantissaBits = 24; diff --git a/src/common/uuid.cpp b/src/common/uuid.cpp index 8f0dba452c..d4a5733c26 100644 --- a/src/common/uuid.cpp +++ b/src/common/uuid.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -10,6 +13,7 @@ #include "common/assert.h" #include "common/tiny_mt.h" #include "common/uuid.h" +#include "common/random.h" namespace Common { @@ -175,21 +179,16 @@ u128 UUID::AsU128() const { } UUID UUID::MakeRandom() { - std::random_device device; - - return MakeRandomWithSeed(device()); + return MakeRandomWithSeed(Common::Random::Random32(0)); } UUID UUID::MakeRandomWithSeed(u32 seed) { // Create and initialize our RNG. TinyMT rng; rng.Initialize(seed); - UUID uuid; - // Populate the UUID with random bytes. rng.GenerateRandomBytes(uuid.uuid.data(), sizeof(UUID)); - return uuid; } diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 08a2d0e2db..6dfc23229a 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1269,7 +1269,6 @@ endif() target_sources(core PRIVATE hle/service/ssl/ssl_backend_openssl.cpp) target_link_libraries(core PRIVATE OpenSSL::SSL OpenSSL::Crypto) -target_compile_definitions(core PRIVATE CPPHTTPLIB_OPENSSL_SUPPORT) # TODO diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp index 61671ea333..af41820a36 100644 --- a/src/core/file_sys/registered_cache.cpp +++ b/src/core/file_sys/registered_cache.cpp @@ -12,6 +12,7 @@ #include "common/fs/path_util.h" #include "common/hex_util.h" #include "common/logging.h" +#include "common/random.h" #include "common/string_util.h" #include "core/crypto/key_manager.h" #include "core/file_sys/card_image.h" @@ -490,17 +491,13 @@ std::vector PlaceholderCache::List() const { } NcaID PlaceholderCache::Generate() { - std::random_device device; - std::mt19937 gen(device()); + auto gen = Common::Random::GetMT19937(); std::uniform_int_distribution distribution(1, (std::numeric_limits::max)()); - NcaID out{}; - const auto v1 = distribution(gen); const auto v2 = distribution(gen); std::memcpy(out.data(), &v1, sizeof(u64)); std::memcpy(out.data() + sizeof(u64), &v2, sizeof(u64)); - return out; } diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp index 1446653916..3343d1d282 100644 --- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp +++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project @@ -7,6 +7,7 @@ #include #include "common/literals.h" +#include "common/random.h" #include "common/settings.h" #include "core/hle/kernel/board/nintendo/nx/k_system_control.h" @@ -201,15 +202,8 @@ u64 GenerateUniformRange(u64 min, u64 max, F f) { } // Anonymous namespace -u64 KSystemControl::GenerateRandomU64() { - std::random_device device; - std::mt19937 gen(device()); - std::uniform_int_distribution distribution(1, (std::numeric_limits::max)()); - return distribution(gen); -} - u64 KSystemControl::GenerateRandomRange(u64 min, u64 max) { - return GenerateUniformRange(min, max, GenerateRandomU64); + return GenerateUniformRange(min, max, Common::Random::GetMT19937()); } size_t KSystemControl::CalculateRequiredSecureMemorySize(size_t size, u32 pool) { diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.h b/src/core/hle/kernel/board/nintendo/nx/k_system_control.h index 60c5e58b73..41a25ba1c8 100644 --- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.h +++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -33,7 +36,6 @@ public: // Randomness. static u64 GenerateRandomRange(u64 min, u64 max); - static u64 GenerateRandomU64(); // Secure Memory. static size_t CalculateRequiredSecureMemorySize(size_t size, u32 pool); diff --git a/src/core/hle/kernel/k_page_bitmap.h b/src/core/hle/kernel/k_page_bitmap.h index fc21b81574..27bd682c5c 100644 --- a/src/core/hle/kernel/k_page_bitmap.h +++ b/src/core/hle/kernel/k_page_bitmap.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project @@ -14,6 +14,7 @@ #include "common/bit_util.h" #include "common/common_types.h" #include "common/tiny_mt.h" +#include "common/random.h" #include "core/hle/kernel/k_system_control.h" namespace Kernel { @@ -23,7 +24,7 @@ public: class RandomBitGenerator { public: RandomBitGenerator() { - m_rng.Initialize(static_cast(KSystemControl::GenerateRandomU64())); + m_rng.Initialize(u32(Common::Random::Random64(0))); } u64 SelectRandomBit(u64 bitmap) { diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp index 17bdb7b6fa..ea9b7eb114 100644 --- a/src/core/hle/kernel/k_thread.cpp +++ b/src/core/hle/kernel/k_thread.cpp @@ -20,6 +20,7 @@ #include "common/fiber.h" #include "common/logging.h" #include "common/settings.h" +#include "common/random.h" #include "core/core.h" #include "core/cpu_manager.h" #include "core/hardware_properties.h" @@ -45,8 +46,7 @@ namespace { constexpr inline s32 TerminatingThreadPriority = Kernel::Svc::SystemThreadPriorityHighest - 1; -static void ResetThreadContext32(Kernel::Svc::ThreadContext& ctx, u64 stack_top, u64 entry_point, - u64 arg) { +static void ResetThreadContext32(Kernel::Svc::ThreadContext& ctx, u64 stack_top, u64 entry_point, u64 arg) { ctx = {}; ctx.r[0] = arg; ctx.r[15] = entry_point; @@ -55,11 +55,10 @@ static void ResetThreadContext32(Kernel::Svc::ThreadContext& ctx, u64 stack_top, ctx.fpsr = 0; } -static void ResetThreadContext64(Kernel::Svc::ThreadContext& ctx, u64 stack_top, u64 entry_point, - u64 arg) { +static void ResetThreadContext64(Kernel::Svc::ThreadContext& ctx, u64 stack_top, u64 entry_point, u64 arg) { ctx = {}; ctx.r[0] = arg; - ctx.r[18] = Kernel::KSystemControl::GenerateRandomU64() | 1; + ctx.r[18] = Common::Random::Random64(0) | 1; ctx.pc = entry_point; ctx.sp = stack_top; ctx.fpcr = 0; diff --git a/src/core/hle/service/bcat/news/builtin_news.cpp b/src/core/hle/service/bcat/news/builtin_news.cpp index ed001b056b..d24431cdbc 100644 --- a/src/core/hle/service/bcat/news/builtin_news.cpp +++ b/src/core/hle/service/bcat/news/builtin_news.cpp @@ -15,9 +15,7 @@ #include #include -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT -#include -#endif +#include "common/httplib.h" #include #include @@ -103,8 +101,6 @@ std::vector TryLoadFromDisk(const std::filesystem::path& path) { std::vector DownloadImage(const std::string& url_path, const std::filesystem::path& cache_path) { LOG_INFO(Service_BCAT, "Downloading image: https://eden-emu.dev{}", url_path); - -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT try { httplib::Client cli("https://eden-emu.dev"); cli.set_follow_location(true); @@ -128,8 +124,6 @@ std::vector DownloadImage(const std::string& url_path, const std::filesystem } catch (...) { LOG_WARNING(Service_BCAT, "Failed to download: {}", url_path); } -#endif - return {}; } @@ -232,8 +226,6 @@ void WriteCachedJson(std::string_view json) { } std::optional DownloadReleasesJson() { - -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT try { httplib::SSLClient cli{"api.github.com", 443}; cli.set_connection_timeout(10); @@ -255,7 +247,6 @@ std::optional DownloadReleasesJson() { } catch (...) { LOG_WARNING(Service_BCAT, " failed to download releases"); } -#endif return std::nullopt; } diff --git a/src/core/hle/service/cmif_serialization.h b/src/core/hle/service/cmif_serialization.h index 4d32c6cd6b..75461cc6be 100644 --- a/src/core/hle/service/cmif_serialization.h +++ b/src/core/hle/service/cmif_serialization.h @@ -438,20 +438,20 @@ void WriteOutArgument(bool is_domain, CallArguments& args, u8* raw_data, HLERequ template void CmifReplyWrapImpl(HLERequestContext& ctx, T& t, Result (T::*f)(A...)) { + const auto mgr = ctx.GetManager().get(); // Verify domain state. if constexpr (!Domain) { - const auto _mgr = ctx.GetManager(); - const bool _is_domain = _mgr ? _mgr->IsDomain() : false; - ASSERT_MSG(!_is_domain, - "Non-domain reply used on domain session\n" - "Service={} (TIPC={} CmdType={} Cmd=0x{:08X}\n" - "HasDomainHeader={} DomainHandlers={}\nDesc={}", - t.GetServiceName(), ctx.IsTipc(), - static_cast(ctx.GetCommandType()), static_cast(ctx.GetCommand()), - ctx.HasDomainMessageHeader(), _mgr ? static_cast(_mgr->DomainHandlerCount()) : 0u, - ctx.Description()); + const bool is_domain = mgr ? mgr->IsDomain() : false; + ASSERT_MSG(!is_domain, + "Non-domain reply used on domain session\n" + "Service={} (TIPC={} CmdType={} Cmd=0x{:08X}\n" + "HasDomainHeader={} DomainHandlers={}\nDesc={}", + t.GetServiceName(), ctx.IsTipc(), + u32(ctx.GetCommandType()), u32(ctx.GetCommand()), + ctx.HasDomainMessageHeader(), mgr ? u32(mgr->DomainHandlerCount()) : 0u, + ctx.Description()); } - const bool is_domain = Domain ? ctx.GetManager()->IsDomain() : false; + const bool is_domain = Domain ? mgr->IsDomain() : false; static_assert(ConstIfReference(), "Arguments taken by reference must be const"); using MethodArguments = std::tuple...>; diff --git a/src/core/hle/service/ipc_helpers.h b/src/core/hle/service/ipc_helpers.h index 4b02872fba..8aee17db8d 100644 --- a/src/core/hle/service/ipc_helpers.h +++ b/src/core/hle/service/ipc_helpers.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2016 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -78,32 +81,29 @@ public: memset(cmdbuf, 0, sizeof(u32) * IPC::COMMAND_BUFFER_LENGTH); IPC::CommandHeader header{}; + auto const mgr = ctx.GetManager().get(); // The entire size of the raw data section in u32 units, including the 16 bytes of mandatory // padding. - u32 raw_data_size = ctx.write_size = - ctx.IsTipc() ? normal_params_size - 1 : normal_params_size; + u32 raw_data_size = ctx.write_size = ctx.IsTipc() ? normal_params_size - 1 : normal_params_size; u32 num_handles_to_move{}; u32 num_domain_objects{}; - const bool always_move_handles{ - (static_cast(flags) & static_cast(Flags::AlwaysMoveHandles)) != 0}; - if (!ctx.GetManager()->IsDomain() || always_move_handles) { + const bool always_move_handles = (u32(flags) & u32(Flags::AlwaysMoveHandles)) != 0; + if (!mgr->IsDomain() || always_move_handles) { num_handles_to_move = num_objects_to_move; } else { num_domain_objects = num_objects_to_move; } - if (ctx.GetManager()->IsDomain()) { - raw_data_size += - static_cast(sizeof(DomainMessageHeader) / sizeof(u32) + num_domain_objects); + if (mgr->IsDomain()) { + raw_data_size += u32(sizeof(DomainMessageHeader) / sizeof(u32) + num_domain_objects); ctx.write_size += num_domain_objects; } if (ctx.IsTipc()) { header.type.Assign(ctx.GetCommandType()); } else { - raw_data_size += static_cast(sizeof(IPC::DataPayloadHeader) / sizeof(u32) + 4 + - normal_params_size); + raw_data_size += u32(sizeof(IPC::DataPayloadHeader) / sizeof(u32) + 4 + normal_params_size); } header.data_size.Assign(raw_data_size); @@ -126,7 +126,7 @@ public: if (!ctx.IsTipc()) { AlignWithPadding(); - if (ctx.GetManager()->IsDomain() && ctx.HasDomainMessageHeader()) { + if (mgr->IsDomain() && ctx.HasDomainMessageHeader()) { IPC::DomainMessageHeader domain_header{}; domain_header.num_objects = num_domain_objects; PushRaw(domain_header); diff --git a/src/core/hle/service/mii/mii_util.h b/src/core/hle/service/mii/mii_util.h index 3534fa31d5..2ef006765c 100644 --- a/src/core/hle/service/mii/mii_util.h +++ b/src/core/hle/service/mii/mii_util.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -7,6 +10,7 @@ #include #include "common/common_types.h" +#include "common/random.h" #include "common/swap.h" #include "common/uuid.h" #include "core/hle/service/mii/mii_types.h" @@ -65,11 +69,9 @@ public: template static T GetRandomValue(T min, T max) { - std::random_device device; - std::mt19937 gen(device()); - std::uniform_int_distribution distribution(static_cast(min), - static_cast(max)); - return static_cast(distribution(gen)); + std::uniform_int_distribution distribution{u64(min), u64(max)}; + auto gen = Common::Random::GetMT19937(); + return T(distribution(gen)); } template diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index 4e0b119f21..2ea63e137e 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp @@ -6,6 +6,7 @@ #include #include "common/logging.h" +#include "common/random.h" #include "common/settings.h" #include "core/core.h" #include "core/file_sys/content_archive.h" @@ -229,7 +230,7 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect // TODO: this is bad form of ASLR, it sucks size_t aslr_offset = ((::Settings::values.rng_seed_enabled.GetValue() ? ::Settings::values.rng_seed.GetValue() - : std::rand()) * 0x734287f27) & 0xfff000; + : Common::Random::Random64(0)) * 0x734287f27) & 0xfff000; // Setup the process code layout if (process.LoadFromMetadata(metadata, code_size, fastmem_base, aslr_offset, is_hbl).IsError()) { diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp index db6c98c5a3..81449ac8b8 100644 --- a/src/core/loader/kip.cpp +++ b/src/core/loader/kip.cpp @@ -6,6 +6,7 @@ #include #include "common/settings.h" +#include "common/random.h" #include "core/file_sys/kernel_executable.h" #include "core/file_sys/program_metadata.h" #include "core/hle/kernel/code_set.h" @@ -90,7 +91,7 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::KProcess& process, // TODO: this is bad form of ASLR, it sucks size_t aslr_offset = ((::Settings::values.rng_seed_enabled.GetValue() ? ::Settings::values.rng_seed.GetValue() - : std::rand()) * 0x734287f27) & 0xfff000; + : Common::Random::Random64(0)) * 0x734287f27) & 0xfff000; // Setup the process code layout if (process.LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), codeset.memory.size(), 0, aslr_offset, false).IsError()) { diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index b429aa9e80..e7c5ac01b1 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp @@ -11,6 +11,7 @@ #include "common/common_types.h" #include "common/logging.h" #include "common/settings.h" +#include "common/random.h" #include "common/swap.h" #include "core/core.h" #include "core/file_sys/control_metadata.h" @@ -243,7 +244,7 @@ static bool LoadNroImpl(Core::System& system, Kernel::KProcess& process, // TODO: this is bad form of ASLR, it sucks size_t aslr_offset = ((::Settings::values.rng_seed_enabled.GetValue() ? ::Settings::values.rng_seed.GetValue() - : std::rand()) * 0x734287f27) & 0xfff000; + : Common::Random::Random64(0)) * 0x734287f27) & 0xfff000; // Setup the process code layout if (process diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 5c57df424c..3a9ea308a8 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -45,11 +45,7 @@ static inline bool AddressSpaceContains(const Common::PageTable& table, const Co // from outside classes. This also allows modification to the internals of the memory // subsystem without needing to rebuild all files that make use of the memory interface. struct Memory::Impl { - explicit Impl(Core::System& system_) : system{system_} { - // Initialize thread count based on available cores for parallel memory operations - const unsigned int hw_concurrency = std::thread::hardware_concurrency(); - thread_count = (std::max)(2u, (std::min)(hw_concurrency, 8u)); // Limit to 8 threads max - } + explicit Impl(Core::System& system_) : system{system_} {} void SetCurrentPageTable(Kernel::KProcess& process) { current_page_table = &process.GetPageTable().GetImpl(); @@ -856,13 +852,7 @@ struct Memory::Impl { Tegra::MaxwellDeviceMemoryManager* gpu_device_memory{}; Common::PageTable* current_page_table = nullptr; - // Number of threads to use for parallel memory operations - unsigned int thread_count = 2; - - // Minimum size in bytes for which parallel processing is beneficial - //size_t PARALLEL_THRESHOLD = (L3 CACHE * NUM PHYSICAL CORES); // 64 KB - std::array - rasterizer_read_areas{}; + std::array rasterizer_read_areas{}; std::array rasterizer_write_areas{}; std::array, Core::Hardware::NUM_CPU_CORES> scratch_buffers{}; std::span gpu_dirty_managers; diff --git a/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.cpp index 47d83f2362..a92648cd44 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.cpp @@ -316,8 +316,8 @@ int RegAlloc::RealizeReadImpl(const IR::Value& value) { return current_location->index; } - ASSERT(!ValueInfo(*current_location).realized); - ASSERT(ValueInfo(*current_location).locked); + ASSERT(!bool(ValueInfo(*current_location).realized)); + ASSERT(bool(ValueInfo(*current_location).locked)); if constexpr (required_kind == HostLoc::Kind::Gpr) { const int new_location_index = AllocateRegister(gprs, gpr_order); diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp index 80f0f9cc2f..dd9e9e4a66 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp @@ -59,8 +59,10 @@ static Xbyak::Address MJitStateExtReg(A32::ExtReg reg) { UNREACHABLE(); } -A32EmitContext::A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block) - : EmitContext(reg_alloc, block), conf(conf) {} +A32EmitContext::A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector& shared_labels) + : EmitContext(reg_alloc, block, shared_labels) + , conf(conf) +{} A32::LocationDescriptor A32EmitContext::Location() const { return A32::LocationDescriptor{block.Location()}; @@ -109,35 +111,59 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { gprs.reset(size_t(HostLoc::R14)); return gprs; }(), any_xmm); - A32EmitContext ctx{conf, reg_alloc, block}; + + A32EmitContext ctx{conf, reg_alloc, block, shared_labels}; // Start emitting. code.align(); const u8* const entrypoint = code.getCurr(); + code.mov(code.qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, abi_base_pointer)], rbp); + code.lea(rbp, code.ptr[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, abi_base_pointer) - 8]); EmitCondPrelude(ctx); - - for (auto iter = block.instructions.begin(); iter != block.instructions.end(); ++iter) [[likely]] { - auto* inst = &*iter; - // Call the relevant Emit* member function. - switch (inst->GetOpcode()) { -#define OPCODE(name, type, ...) \ - case IR::Opcode::name: \ - A32EmitX64::Emit##name(ctx, inst); \ - break; -#define A32OPC(name, type, ...) \ - case IR::Opcode::A32##name: \ - A32EmitX64::EmitA32##name(ctx, inst);\ - break; + typedef void (EmitX64::*EmitHandlerFn)(EmitContext& context, IR::Inst* inst); + constexpr EmitHandlerFn opcode_handlers[] = { +#define OPCODE(name, type, ...) &EmitX64::Emit##name, +#define A32OPC(name, type, ...) +#define A64OPC(name, type, ...) +#include "dynarmic/ir/opcodes.inc" +#undef OPCODE +#undef A32OPC +#undef A64OPC + }; + typedef void (A32EmitX64::*A32EmitHandlerFn)(A32EmitContext& context, IR::Inst* inst); + constexpr A32EmitHandlerFn a32_handlers[] = { +#define OPCODE(...) +#define A32OPC(name, type, ...) &A32EmitX64::EmitA32##name, #define A64OPC(...) #include "dynarmic/ir/opcodes.inc" #undef OPCODE #undef A32OPC +#undef A64OPC + }; + + for (auto& inst : block.instructions) { + auto const opcode = inst.GetOpcode(); + // Call the relevant Emit* member function. + switch (opcode) { +#define OPCODE(name, type, ...) case IR::Opcode::name: goto opcode_branch; +#define A32OPC(name, type, ...) case IR::Opcode::A32##name: goto a32_branch; +#define A64OPC(name, type, ...) +#include "dynarmic/ir/opcodes.inc" +#undef OPCODE +#undef A32OPC #undef A64OPC default: UNREACHABLE(); } - reg_alloc.EndOfAllocScope(); +opcode_branch: + (this->*opcode_handlers[size_t(opcode)])(ctx, &inst); + goto finish_this_inst; +a32_branch: + // Update with FIRST A32 instruction + (this->*a32_handlers[size_t(opcode) - size_t(IR::Opcode::A32SetCheckBit)])(ctx, &inst); +finish_this_inst: + ctx.reg_alloc.EndOfAllocScope(); #ifndef NDEBUG if (conf.very_verbose_debugging_output) EmitVerboseDebuggingOutput(reg_alloc); @@ -146,15 +172,14 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { reg_alloc.AssertNoMoreUses(); - if (conf.enable_cycle_counting) { + if (conf.enable_cycle_counting) EmitAddCycles(block.CycleCount()); - } + code.mov(rbp, code.qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, abi_base_pointer)]); EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep()); code.int3(); - for (auto& deferred_emit : ctx.deferred_emits) { + for (auto& deferred_emit : ctx.deferred_emits) deferred_emit(); - } code.int3(); const size_t size = size_t(code.getCurr() - entrypoint); @@ -167,6 +192,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { auto const bdesc = RegisterBlock(descriptor, entrypoint, size); code.DisableWriting(); + shared_labels.clear(); return bdesc; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h index 5ec78ff50e..8e97dc7737 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -29,7 +29,7 @@ namespace Dynarmic::Backend::X64 { class RegAlloc; struct A32EmitContext final : public EmitContext { - A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block); + A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector& shared_labels); A32::LocationDescriptor Location() const; A32::LocationDescriptor EndLocation() const; @@ -130,6 +130,7 @@ public: ankerl::unordered_dense::map, void (*)()> write_fallbacks; ankerl::unordered_dense::map, void (*)()> exclusive_write_fallbacks; ankerl::unordered_dense::set do_not_fastmem; + boost::container::stable_vector shared_labels; void (*memory_read_128)() = nullptr; // Dummy void (*memory_write_128)() = nullptr; // Dummy const void* terminal_handler_pop_rsb_hint; diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index 832cfdcce2..8edeb29aed 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -37,8 +37,10 @@ namespace Dynarmic::Backend::X64 { using namespace Xbyak::util; -A64EmitContext::A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block) - : EmitContext(reg_alloc, block), conf(conf) {} +A64EmitContext::A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector& shared_labels) + : EmitContext(reg_alloc, block, shared_labels) + , conf(conf) +{} A64::LocationDescriptor A64EmitContext::Location() const { return A64::LocationDescriptor{block.Location()}; @@ -83,11 +85,14 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept { gprs.reset(size_t(HostLoc::R14)); return gprs; }(), any_xmm}; - A64EmitContext ctx{conf, reg_alloc, block}; + + A64EmitContext ctx{conf, reg_alloc, block, shared_labels}; // Start emitting. code.align(); const auto* const entrypoint = code.getCurr(); + code.mov(code.qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, abi_base_pointer)], rbp); + code.lea(rbp, code.ptr[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, abi_base_pointer) - 8]); DEBUG_ASSERT(block.GetCondition() == IR::Cond::AL); typedef void (EmitX64::*EmitHandlerFn)(EmitContext& context, IR::Inst* inst); @@ -139,16 +144,13 @@ finish_this_inst: } reg_alloc.AssertNoMoreUses(); - - if (conf.enable_cycle_counting) { + if (conf.enable_cycle_counting) EmitAddCycles(block.CycleCount()); - } + code.mov(rbp, code.qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, abi_base_pointer)]); EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep()); code.int3(); - - for (auto& deferred_emit : ctx.deferred_emits) { + for (auto& deferred_emit : ctx.deferred_emits) deferred_emit(); - } code.int3(); const size_t size = size_t(code.getCurr() - entrypoint); @@ -161,6 +163,7 @@ finish_this_inst: auto bdesc = RegisterBlock(descriptor, entrypoint, size); code.DisableWriting(); + shared_labels.clear(); return bdesc; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h index dd556e36ce..d57b1d81b9 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -27,7 +27,7 @@ namespace Dynarmic::Backend::X64 { struct A64EmitContext final : public EmitContext { - A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block); + A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector& shared_labels); A64::LocationDescriptor Location() const; bool IsSingleStep() const; @@ -126,6 +126,7 @@ public: ankerl::unordered_dense::map, void (*)()> write_fallbacks; ankerl::unordered_dense::map, void (*)()> exclusive_write_fallbacks; ankerl::unordered_dense::set do_not_fastmem; + boost::container::stable_vector shared_labels; const void* terminal_handler_pop_rsb_hint = nullptr; const void* terminal_handler_fast_dispatch_hint = nullptr; FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr; diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp index 4e515fef2f..4ed198e09f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp @@ -32,8 +32,11 @@ namespace Dynarmic::Backend::X64 { using namespace Xbyak::util; -EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block) - : reg_alloc(reg_alloc), block(block) {} +EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector& shared_labels) + : reg_alloc(reg_alloc) + , block(block) + , shared_labels(shared_labels) +{} EmitContext::~EmitContext() = default; diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.h b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.h index 301f4ffc89..619945e19a 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.h +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.h @@ -16,11 +16,12 @@ #include #include -#include "dynarmic/mcl/bit.hpp" #include -#include "dynarmic/backend/x64/xbyak.h" +#include #include +#include "dynarmic/backend/x64/xbyak.h" +#include "dynarmic/mcl/bit.hpp" #include "dynarmic/backend/exception_handler.h" #include "dynarmic/backend/x64/reg_alloc.h" #include "dynarmic/common/fp/fpcr.h" @@ -52,24 +53,23 @@ using VectorArray = std::array> template using HalfVectorArray = std::array / 2>; +using SharedLabel = Xbyak::Label*; struct EmitContext { - EmitContext(RegAlloc& reg_alloc, IR::Block& block); + EmitContext(RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector& shared_labels); virtual ~EmitContext(); virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0; virtual bool HasOptimization(OptimizationFlag flag) const = 0; - RegAlloc& reg_alloc; - IR::Block& block; + [[nodiscard]] inline Xbyak::Label* GenSharedLabel() noexcept { + return &shared_labels.emplace_back(); + } std::vector> deferred_emits; + RegAlloc& reg_alloc; + IR::Block& block; + boost::container::stable_vector& shared_labels; }; -using SharedLabel = std::shared_ptr; - -inline SharedLabel GenSharedLabel() { - return std::make_shared(); -} - class EmitX64 { public: struct BlockDescriptor { diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index d073991fbe..6a3ab005f3 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -136,7 +136,7 @@ void ForceToDefaultNaN(BlockOfCode& code, Xbyak::Xmm result) { template SharedLabel ProcessNaN(BlockOfCode& code, EmitContext& ctx, Xbyak::Xmm a) { - SharedLabel nan = GenSharedLabel(), end = GenSharedLabel(); + SharedLabel nan = ctx.GenSharedLabel(), end = ctx.GenSharedLabel(); FCODE(ucomis)(a, a); code.jp(*nan, code.T_NEAR); @@ -251,7 +251,7 @@ template void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - SharedLabel end = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); @@ -304,7 +304,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code); - SharedLabel end = GenSharedLabel(), nan = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(), nan = ctx.GenSharedLabel(); code.movaps(result, op1); if constexpr (std::is_member_function_pointer_v) { @@ -413,7 +413,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bo DenormalsAreZero(code, ctx, {result, operand}); - SharedLabel equal = GenSharedLabel(), end = GenSharedLabel(); + SharedLabel equal = ctx.GenSharedLabel(), end = ctx.GenSharedLabel(); FCODE(ucomis)(result, operand); code.jz(*equal, code.T_NEAR); @@ -484,7 +484,7 @@ static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR:: } }; - SharedLabel end = GenSharedLabel(), z = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(), z = ctx.GenSharedLabel(); FCODE(ucomis)(op1, op2); code.jz(*z, code.T_NEAR); @@ -632,7 +632,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bo } if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX)) { - SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel(); + SharedLabel fallback = ctx.GenSharedLabel(), end = ctx.GenSharedLabel(); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); @@ -843,7 +843,7 @@ static void EmitFPMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Reg64 tmp = do_default_nan ? INVALID_REG : ctx.reg_alloc.ScratchGpr(code); - SharedLabel end = GenSharedLabel(), nan = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(), nan = ctx.GenSharedLabel(); if (code.HasHostFeature(HostFeature::AVX)) { FCODE(vmuls)(result, op1, op2); @@ -981,7 +981,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* } if (code.HasHostFeature(HostFeature::FMA)) { - SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel(); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); @@ -1129,7 +1129,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(code); [[maybe_unused]] const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32(); - SharedLabel bad_values = GenSharedLabel(), end = GenSharedLabel(); + SharedLabel bad_values = ctx.GenSharedLabel(), end = ctx.GenSharedLabel(); code.movaps(value, operand); @@ -1296,7 +1296,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* } if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX)) { - SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel(); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); @@ -1641,7 +1641,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(code); if (!unsigned_) { - SharedLabel saturate_max = GenSharedLabel(), end = GenSharedLabel(); + SharedLabel saturate_max = ctx.GenSharedLabel(), end = ctx.GenSharedLabel(); ZeroIfNaN<64>(code, src, scratch); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc index 54fc595214..4fa14d504b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -86,7 +86,7 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) { const auto wrapped_fn = read_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)]; - SharedLabel abort = GenSharedLabel(), end = GenSharedLabel(); + SharedLabel abort = ctx.GenSharedLabel(), end = ctx.GenSharedLabel(); if (fastmem_marker) { // Use fastmem @@ -108,7 +108,7 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) { conf.recompile_on_fastmem_failure, }); - EmitCheckMemoryAbort(ctx, inst, end.get()); + EmitCheckMemoryAbort(ctx, inst, end); code.jmp(*end, code.T_NEAR); }); } else { @@ -120,7 +120,7 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) { ctx.deferred_emits.emplace_back([=, this, &ctx] { code.L(*abort); code.call(wrapped_fn); - EmitCheckMemoryAbort(ctx, inst, end.get()); + EmitCheckMemoryAbort(ctx, inst, end); code.jmp(*end, code.T_NEAR); }); } @@ -173,7 +173,7 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) { const auto wrapped_fn = write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)]; - SharedLabel abort = GenSharedLabel(), end = GenSharedLabel(); + SharedLabel abort = ctx.GenSharedLabel(), end = ctx.GenSharedLabel(); if (fastmem_marker) { // Use fastmem @@ -195,7 +195,7 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) { conf.recompile_on_fastmem_failure, }); - EmitCheckMemoryAbort(ctx, inst, end.get()); + EmitCheckMemoryAbort(ctx, inst, end); code.jmp(*end, code.T_NEAR); }); } else { @@ -207,7 +207,7 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) { ctx.deferred_emits.emplace_back([=, this, &ctx] { code.L(*abort); code.call(wrapped_fn); - EmitCheckMemoryAbort(ctx, inst, end.get()); + EmitCheckMemoryAbort(ctx, inst, end); code.jmp(*end, code.T_NEAR); }); } @@ -352,7 +352,7 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in const auto fastmem_marker = ShouldFastmem(ctx, inst); if (fastmem_marker) { - SharedLabel abort = GenSharedLabel(), end = GenSharedLabel(); + SharedLabel abort = ctx.GenSharedLabel(), end = ctx.GenSharedLabel(); bool require_abort_handling = false; const auto src_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling); @@ -427,7 +427,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i EmitExclusiveLock(code, conf, tmp, tmp2.cvt32()); - SharedLabel end = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(); code.mov(status, u32(1)); code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]); @@ -460,7 +460,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i const auto fastmem_marker = ShouldFastmem(ctx, inst); if (fastmem_marker) { - SharedLabel abort = GenSharedLabel(); + SharedLabel abort = ctx.GenSharedLabel(); bool require_abort_handling = false; const auto dest_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling, tmp); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h index b354efcb51..3ac078f1d7 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h @@ -54,7 +54,7 @@ void EmitDetectMisalignedVAddr(BlockOfCode& code, EmitContext& ctx, size_t bitsi if (ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) { const u32 page_align_mask = static_cast(page_table_const_size - 1) & ~align_mask; - SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel(); + SharedLabel detect_boundary = ctx.GenSharedLabel(), resume = ctx.GenSharedLabel(); code.jnz(*detect_boundary, code.T_NEAR); code.L(*resume); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index a0fd944041..6f53580997 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -38,33 +38,21 @@ template static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); (code.*fn)(xmm_a, xmm_b); ctx.reg_alloc.DefineValue(code, inst, xmm_a); } -template -static void EmitAVXVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - - (code.*fn)(xmm_a, xmm_a, xmm_b); - - ctx.reg_alloc.DefineValue(code, inst, xmm_a); -} - template static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 2 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); @@ -86,8 +74,8 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 2 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); @@ -111,9 +99,9 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 3 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + auto const arg2 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); @@ -139,9 +127,9 @@ static void EmitTwoArgumentFallbackWithSaturationAndImmediate(BlockOfCode& code, const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 2 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + auto const arg1 = ctx.reg_alloc.UseXmm(code, args[0]); const u8 arg2 = args[1].GetImmediateU8(); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); @@ -166,9 +154,9 @@ static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 3 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + auto const arg2 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); @@ -194,7 +182,7 @@ void EmitX64::EmitVectorGetElement8(EmitContext& ctx, IR::Inst* inst) { // TODO: DefineValue directly on Argument for index == 0 - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseXmm(code, args[0]); const Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr(code).cvt32(); if (code.HasHostFeature(HostFeature::SSE41)) { @@ -218,7 +206,7 @@ void EmitX64::EmitVectorGetElement16(EmitContext& ctx, IR::Inst* inst) { // TODO: DefineValue directly on Argument for index == 0 - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseXmm(code, args[0]); const Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.pextrw(dest, source, index); ctx.reg_alloc.DefineValue(code, inst, dest); @@ -234,10 +222,10 @@ void EmitX64::EmitVectorGetElement32(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr(code).cvt32(); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseXmm(code, args[0]); code.pextrd(dest, source, index); } else { - const Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshufd(source, source, index); code.movd(dest, source); } @@ -253,7 +241,7 @@ void EmitX64::EmitVectorGetElement64(EmitContext& ctx, IR::Inst* inst) { if (index == 0) { // TODO: DefineValue directly on Argument for index == 0 const Xbyak::Reg64 dest = ctx.reg_alloc.ScratchGpr(code).cvt64(); - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseXmm(code, args[0]); code.movq(dest, source); ctx.reg_alloc.DefineValue(code, inst, dest); return; @@ -262,10 +250,10 @@ void EmitX64::EmitVectorGetElement64(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg64 dest = ctx.reg_alloc.ScratchGpr(code).cvt64(); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseXmm(code, args[0]); code.pextrq(dest, source, 1); } else { - const Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const source = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.punpckhqdq(source, source); code.movq(dest, source); } @@ -277,7 +265,7 @@ void EmitX64::EmitVectorSetElement8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); - const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { const Xbyak::Reg8 source_elem = ctx.reg_alloc.UseGpr(code, args[2]).cvt8(); @@ -310,7 +298,7 @@ void EmitX64::EmitVectorSetElement16(EmitContext& ctx, IR::Inst* inst) { ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); - const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); const Xbyak::Reg16 source_elem = ctx.reg_alloc.UseGpr(code, args[2]).cvt16(); code.pinsrw(source_vector, source_elem.cvt32(), index); @@ -322,7 +310,7 @@ void EmitX64::EmitVectorSetElement32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); - const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { const Xbyak::Reg32 source_elem = ctx.reg_alloc.UseGpr(code, args[2]).cvt32(); @@ -345,7 +333,7 @@ void EmitX64::EmitVectorSetElement64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); - const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { const Xbyak::Reg64 source_elem = ctx.reg_alloc.UseGpr(code, args[2]); @@ -355,7 +343,7 @@ void EmitX64::EmitVectorSetElement64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, source_vector); } else { const Xbyak::Reg64 source_elem = ctx.reg_alloc.UseGpr(code, args[2]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movq(tmp, source_elem); @@ -369,72 +357,53 @@ void EmitX64::EmitVectorSetElement64(EmitContext& ctx, IR::Inst* inst) { } } -static void VectorAbs8(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& data) { - if (code.HasHostFeature(HostFeature::SSSE3)) { - code.pabsb(data, data); - } else { - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); - code.pxor(temp, temp); - code.psubb(temp, data); - code.pminub(data, temp); - } -} - -static void VectorAbs16(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& data) { - if (code.HasHostFeature(HostFeature::SSSE3)) { - code.pabsw(data, data); - } else { - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); - code.pxor(temp, temp); - code.psubw(temp, data); - code.pmaxsw(data, temp); - } -} - -static void VectorAbs32(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& data) { - if (code.HasHostFeature(HostFeature::SSSE3)) { - code.pabsd(data, data); - } else { - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); - code.movdqa(temp, data); - code.psrad(temp, 31); - code.pxor(data, temp); - code.psubd(data, temp); - } -} - -static void VectorAbs64(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& data) { - if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - code.vpabsq(data, data); - } else { - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); - code.pshufd(temp, data, 0b11110101); - code.psrad(temp, 31); - code.pxor(data, temp); - code.psubq(data, temp); - } -} - static void EmitVectorAbs(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); - + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); switch (esize) { case 8: - VectorAbs8(code, ctx, data); + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pabsb(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.pxor(temp, temp); + code.psubb(temp, data); + code.pminub(data, temp); + } break; case 16: - VectorAbs16(code, ctx, data); + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pabsw(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.pxor(temp, temp); + code.psubw(temp, data); + code.pmaxsw(data, temp); + } break; case 32: - VectorAbs32(code, ctx, data); + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pabsd(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(temp, data); + code.psrad(temp, 31); + code.pxor(data, temp); + code.psubd(data, temp); + } break; case 64: - VectorAbs64(code, ctx, data); + if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { + code.vpabsq(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.pshufd(temp, data, 0b11110101); + code.psrad(temp, 31); + code.pxor(data, temp); + code.psubq(data, temp); + } break; } - ctx.reg_alloc.DefineValue(code, inst, data); } @@ -477,15 +446,15 @@ void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorAndNot(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const xmm_a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.pandn(xmm_b, xmm_a); ctx.reg_alloc.DefineValue(code, inst, xmm_b); } -static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) { +static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, auto const& result, u8 shift_amount) { if (code.HasHostFeature(HostFeature::GFNI)) { const u64 shift_matrix = shift_amount < 8 ? (0x0102040810204080 << (shift_amount * 8)) | (0x8080808080808080 >> (64 - shift_amount * 8)) @@ -494,7 +463,7 @@ static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const return; } - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.punpckhbw(tmp, result); code.punpcklbw(result, result); @@ -506,7 +475,7 @@ static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const void EmitX64::EmitVectorArithmeticShiftRight8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); ArithmeticShiftRightByte(ctx, code, result, shift_amount); @@ -517,7 +486,7 @@ void EmitX64::EmitVectorArithmeticShiftRight8(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorArithmeticShiftRight16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psraw(result, shift_amount); @@ -528,7 +497,7 @@ void EmitX64::EmitVectorArithmeticShiftRight16(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psrad(result, shift_amount); @@ -538,14 +507,14 @@ void EmitX64::EmitVectorArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = (std::min)(args[1].GetImmediateU8(), u8(63)); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { code.vpsraq(result, result, shift_amount); } else { - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); const u64 sign_bit = 0x80000000'00000000u >> shift_amount; @@ -660,12 +629,12 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastb(a, a); code.vmovq(a, a); } else if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pxor(tmp, tmp); code.pshufb(a, tmp); code.movq(a, a); @@ -678,7 +647,7 @@ void EmitX64::EmitVectorBroadcastLower8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(a, a, 0); @@ -687,7 +656,7 @@ void EmitX64::EmitVectorBroadcastLower16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastLower32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(a, a, 0b01000100); @@ -696,11 +665,11 @@ void EmitX64::EmitVectorBroadcastLower32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcast8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastb(a, a); } else if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pxor(tmp, tmp); code.pshufb(a, tmp); } else { @@ -713,7 +682,7 @@ void EmitX64::EmitVectorBroadcast8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcast16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastw(a, a); } else { @@ -725,7 +694,7 @@ void EmitX64::EmitVectorBroadcast16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcast32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastd(a, a); } else { @@ -736,7 +705,7 @@ void EmitX64::EmitVectorBroadcast32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcast64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastq(a, a); } else { @@ -747,7 +716,7 @@ void EmitX64::EmitVectorBroadcast64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastElementLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 16); @@ -758,7 +727,7 @@ void EmitX64::EmitVectorBroadcastElementLower8(EmitContext& ctx, IR::Inst* inst) code.vpbroadcastb(a, a); code.vmovq(a, a); } else if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pxor(tmp, tmp); code.pshufb(a, tmp); code.movq(a, a); @@ -771,7 +740,7 @@ void EmitX64::EmitVectorBroadcastElementLower8(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorBroadcastElementLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 8); @@ -784,7 +753,7 @@ void EmitX64::EmitVectorBroadcastElementLower16(EmitContext& ctx, IR::Inst* inst void EmitX64::EmitVectorBroadcastElementLower32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 4); @@ -800,7 +769,7 @@ void EmitX64::EmitVectorBroadcastElementLower32(EmitContext& ctx, IR::Inst* inst void EmitX64::EmitVectorBroadcastElement8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 16); @@ -810,7 +779,7 @@ void EmitX64::EmitVectorBroadcastElement8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastb(a, a); } else if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pxor(tmp, tmp); code.pshufb(a, tmp); @@ -824,7 +793,7 @@ void EmitX64::EmitVectorBroadcastElement8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastElement16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 8); @@ -844,7 +813,7 @@ void EmitX64::EmitVectorBroadcastElement16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastElement32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 4); @@ -856,7 +825,7 @@ void EmitX64::EmitVectorBroadcastElement32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastElement64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 2); @@ -1043,9 +1012,9 @@ void EmitX64::EmitVectorCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveEven8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.pand(lhs, tmp); @@ -1057,11 +1026,11 @@ void EmitX64::EmitVectorDeinterleaveEven8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveEven16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code); + auto const zero = ctx.reg_alloc.ScratchXmm(code); code.pxor(zero, zero); code.pblendw(lhs, zero, 0b10101010); @@ -1082,8 +1051,8 @@ void EmitX64::EmitVectorDeinterleaveEven16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveEven32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.shufps(lhs, rhs, 0b10001000); @@ -1092,8 +1061,8 @@ void EmitX64::EmitVectorDeinterleaveEven32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveEven64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.shufpd(lhs, rhs, 0b00); @@ -1102,16 +1071,16 @@ void EmitX64::EmitVectorDeinterleaveEven64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveEvenLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklbw(lhs, rhs); code.pshufb(lhs, code.Const(xword, 0x0D'09'05'01'0C'08'04'00, 0x8080808080808080)); } else { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.movdqa(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.pand(lhs, tmp); @@ -1126,15 +1095,15 @@ void EmitX64::EmitVectorDeinterleaveEvenLower8(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorDeinterleaveEvenLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklwd(lhs, rhs); code.pshufb(lhs, code.Const(xword, 0x0B0A'0302'0908'0100, 0x8080'8080'8080'8080)); } else { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.pslld(lhs, 16); code.psrad(lhs, 16); @@ -1152,8 +1121,8 @@ void EmitX64::EmitVectorDeinterleaveEvenLower16(EmitContext& ctx, IR::Inst* inst void EmitX64::EmitVectorDeinterleaveEvenLower32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); if (code.HasHostFeature(HostFeature::SSE41)) { // copy bytes 0:3 of rhs to lhs, zero out upper 8 bytes @@ -1168,8 +1137,8 @@ void EmitX64::EmitVectorDeinterleaveEvenLower32(EmitContext& ctx, IR::Inst* inst void EmitX64::EmitVectorDeinterleaveOdd8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psraw(lhs, 8); code.psraw(rhs, 8); @@ -1180,8 +1149,8 @@ void EmitX64::EmitVectorDeinterleaveOdd8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveOdd16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psrad(lhs, 16); code.psrad(rhs, 16); @@ -1192,8 +1161,8 @@ void EmitX64::EmitVectorDeinterleaveOdd16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveOdd32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.shufps(lhs, rhs, 0b11011101); @@ -1202,8 +1171,8 @@ void EmitX64::EmitVectorDeinterleaveOdd32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveOdd64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.shufpd(lhs, rhs, 0b11); @@ -1212,15 +1181,15 @@ void EmitX64::EmitVectorDeinterleaveOdd64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveOddLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklbw(lhs, rhs); code.pshufb(lhs, code.Const(xword, 0x0F'0B'07'03'0E'0A'06'02, 0x8080808080808080)); } else { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psraw(lhs, 8); code.psraw(rhs, 8); @@ -1234,15 +1203,15 @@ void EmitX64::EmitVectorDeinterleaveOddLower8(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorDeinterleaveOddLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklwd(lhs, rhs); code.pshufb(lhs, code.Const(xword, 0x0F0E'0706'0D0C'0504, 0x8080'8080'8080'8080)); } else { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psrad(lhs, 16); code.psrad(rhs, 16); @@ -1258,17 +1227,17 @@ void EmitX64::EmitVectorDeinterleaveOddLower32(EmitContext& ctx, IR::Inst* inst) auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm lhs = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const lhs = ctx.reg_alloc.UseXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); // copy bytes 4:7 of lhs to bytes 0:3 of rhs, zero out upper 8 bytes code.insertps(rhs, lhs, 0b01001100); ctx.reg_alloc.DefineValue(code, inst, rhs); } else { - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code); + auto const lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const rhs = ctx.reg_alloc.UseXmm(code, args[1]); + auto const zero = ctx.reg_alloc.ScratchXmm(code); code.xorps(zero, zero); code.unpcklps(lhs, rhs); @@ -1302,9 +1271,9 @@ void EmitX64::EmitVectorEqual64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqd(xmm_a, xmm_b); code.pshufd(tmp, xmm_a, 0b10110001); @@ -1317,9 +1286,9 @@ void EmitX64::EmitVectorEqual128(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqq(xmm_a, xmm_b); code.pshufd(tmp, xmm_a, 0b01001110); @@ -1327,9 +1296,9 @@ void EmitX64::EmitVectorEqual128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, xmm_a); } else { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqd(xmm_a, xmm_b); code.pshufd(tmp, xmm_a, 0b10110001); @@ -1353,16 +1322,16 @@ void EmitX64::EmitVectorExtract(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const xmm_a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.palignr(xmm_b, xmm_a, position / 8); ctx.reg_alloc.DefineValue(code, inst, xmm_b); return; } - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psrldq(xmm_a, position / 8); code.pslldq(xmm_b, (128 - position) / 8); @@ -1374,13 +1343,13 @@ void EmitX64::EmitVectorExtract(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorExtractLower(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 position = args[2].GetImmediateU8(); ASSERT(position % 8 == 0); if (position != 0) { - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklqdq(xmm_a, xmm_b); code.psrldq(xmm_a, position / 8); @@ -1405,22 +1374,33 @@ void EmitX64::EmitVectorGreaterS32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorGreaterS64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE42)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pcmpgtq); - return; + } else { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, code.Const(xword, 0x80000000, 0x80000000)); + code.pxor(tmp0, tmp2); + code.pxor(tmp1, tmp2); + code.movdqa(tmp2, tmp0); + code.pcmpeqd(tmp0, tmp1); + code.pcmpgtd(tmp2, tmp1); + code.pshufd(tmp1, tmp0, 245); + code.pshufd(tmp3, tmp2, 160); + code.pshufd(tmp0, tmp2, 245); + code.pand(tmp1, tmp3); + code.por(tmp0, tmp1); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - for (size_t i = 0; i < result.size(); ++i) { - result[i] = (a[i] > b[i]) ? ~u64(0) : 0; - } - }); } static void EmitVectorHalvingAddSigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, b); code.pand(tmp, a); @@ -1459,9 +1439,9 @@ void EmitX64::EmitVectorHalvingAddS32(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorHalvingAddUnsigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, b); @@ -1504,12 +1484,12 @@ void EmitX64::EmitVectorHalvingAddU32(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorHalvingSubSigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); switch (esize) { case 8: { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, code.Const(xword, 0x8080808080808080, 0x8080808080808080)); code.pxor(a, tmp); code.pxor(b, tmp); @@ -1518,7 +1498,7 @@ static void EmitVectorHalvingSubSigned(size_t esize, EmitContext& ctx, IR::Inst* break; } case 16: { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, code.Const(xword, 0x8000800080008000, 0x8000800080008000)); code.pxor(a, tmp); code.pxor(b, tmp); @@ -1552,8 +1532,8 @@ void EmitX64::EmitVectorHalvingSubS32(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorHalvingSubUnsigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); switch (esize) { case 8: @@ -1590,8 +1570,8 @@ void EmitX64::EmitVectorHalvingSubU32(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorInterleaveLower(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); switch (size) { case 8: @@ -1630,8 +1610,8 @@ void EmitX64::EmitVectorInterleaveLower64(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorInterleaveUpper(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); switch (size) { case 8: @@ -1670,7 +1650,7 @@ void EmitX64::EmitVectorInterleaveUpper64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); if (shift_amount == 0) { @@ -1696,7 +1676,7 @@ void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftLeft16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psllw(result, shift_amount); @@ -1707,7 +1687,7 @@ void EmitX64::EmitVectorLogicalShiftLeft16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.pslld(result, shift_amount); @@ -1718,7 +1698,7 @@ void EmitX64::EmitVectorLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psllq(result, shift_amount); @@ -1729,7 +1709,7 @@ void EmitX64::EmitVectorLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); if (shift_amount == 0) { @@ -1753,7 +1733,7 @@ void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftRight16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psrlw(result, shift_amount); @@ -1764,7 +1744,7 @@ void EmitX64::EmitVectorLogicalShiftRight16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psrld(result, shift_amount); @@ -1775,7 +1755,7 @@ void EmitX64::EmitVectorLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psrlq(result, shift_amount); @@ -1783,41 +1763,12 @@ void EmitX64::EmitVectorLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, result); } -template -static void EmitVectorLogicalVShiftAVX2(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - static_assert(esize == 32 || esize == 64); - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); - - // store sign bit of lowest byte of each element of b to select left/right shift later - ICODE(vpsll)(xmm0, b, u8(esize - 8)); - - // sse/avx shifts are only positive, with dedicated left/right forms - shift by lowest byte of abs(b) - code.vpabsb(b, b); - code.vpand(b, b, code.BConst(xword, 0xFF)); - - // calculate shifts - ICODE(vpsllv)(result, a, b); - ICODE(vpsrlv)(a, a, b); - - // implicit argument: xmm0 (sign of lowest byte of b) - if (esize == 32) { - code.blendvps(result, a); - } else { - code.blendvpd(result, a); - } - ctx.reg_alloc.DefineValue(code, inst, result); -} - void EmitX64::EmitVectorLogicalVShift8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::GFNI)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const left_shift = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Opmask negative_mask = k1; code.pxor(tmp, tmp); @@ -1862,10 +1813,10 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const left_shift = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const right_shift = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); @@ -1886,18 +1837,87 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitVectorLogicalVShift32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX2)) { - EmitVectorLogicalVShiftAVX2<32>(code, ctx, inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); + auto const mask = ctx.reg_alloc.ScratchXmm(code); + // store sign bit of lowest byte of each element of b to select left/right shift later + code.vpslld(mask, b, u8(32 - 8)); + // sse/avx shifts are only positive, with dedicated left/right forms - shift by lowest byte of abs(b) + code.vpabsb(b, b); + code.vpand(b, b, code.BConst<32>(xword, 0xFF)); + // calculate shifts + code.vpsllvd(result, a, b); + code.vpsrlvd(a, a, b); + code.vblendvps(result, result, a, mask); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), VShift); - }); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp5 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp6 = ctx.reg_alloc.ScratchXmm(code); + code.pxor(tmp3, tmp3); + code.movdqa(tmp2, tmp0); + code.psubb(tmp3, tmp1); + code.movdqa(tmp4, tmp2); + code.movdqa(tmp6, tmp2); + code.pminub(tmp3, tmp1); + code.pslld(tmp1, 24); + code.pand(tmp3, code.Const(xword, 0x000000ff'000000ff, 0x000000ff'000000ff)); + code.psrad(tmp1, 31); + code.pshuflw(tmp0, tmp3, 254); + code.pshuflw(tmp5, tmp3, 84); + code.psrld(tmp4, tmp0); + code.movdqa(tmp0, tmp2); + code.psrld(tmp0, tmp5); + code.punpcklqdq(tmp0, tmp4); + code.pshufd(tmp4, tmp3, 238); + code.pslld(tmp3, 23); + code.paddd(tmp3, code.Const(xword, 0x3F80'00003F80'0000, 0x3F80'00003F80'0000)); + code.pshuflw(tmp5, tmp4, 254); + code.pshuflw(tmp4, tmp4, 84); + code.psrld(tmp6, tmp5); + code.movdqa(tmp5, tmp2); + code.psrld(tmp5, tmp4); + code.pshufd(tmp4, tmp2, 245); + code.punpckhqdq(tmp5, tmp6); + code.cvttps2dq(tmp3, tmp3); + code.shufps(tmp0, tmp5, 204); + code.pmuludq(tmp2, tmp3); + code.pshufd(tmp3, tmp3, 245); + code.andps(tmp0, tmp1); + code.pmuludq(tmp3, tmp4); + code.pshufd(tmp2, tmp2, 232); + code.pshufd(tmp3, tmp3, 232); + code.punpckldq(tmp2, tmp3); + code.pandn(tmp1, tmp2); + code.orps(tmp0, tmp1); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } } void EmitX64::EmitVectorLogicalVShift64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX2)) { - EmitVectorLogicalVShiftAVX2<64>(code, ctx, inst); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); + auto const mask = ctx.reg_alloc.ScratchXmm(code); + // store sign bit of lowest byte of each element of b to select left/right shift later + code.vpsllq(mask, b, u8(64 - 8)); + // sse/avx shifts are only positive, with dedicated left/right forms - shift by lowest byte of abs(b) + code.vpabsb(b, b); + code.vpand(b, b, code.BConst<64>(xword, 0xFF)); + // calculate shifts + code.vpsllvq(result, a, b); + code.vpsrlvq(a, a, b); + code.vblendvpd(result, result, a, mask); + ctx.reg_alloc.DefineValue(code, inst, result); } else { EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), VShift); @@ -1912,28 +1932,11 @@ enum class MinMaxOperation { Max, }; -// Compute the minimum/maximum of two vectors of signed 8-bit integers, using only SSE2 instructons. -// The result of the operation is placed in operand a, while b is unmodified. -void FallbackMinMaxS8(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b, MinMaxOperation op) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); - if(op == MinMaxOperation::Min) { - code.movdqa(c, b); - code.pcmpgtb(c, a); - } else { - code.movdqa(c, a); - code.pcmpgtb(c, b); - } - - code.pand(a, c); - code.pandn(c, b); - code.por(a, c); -} - // Compute the minimum/maximum of two vectors of unsigned 16-bit integers, using only SSE2 instructons. // The result of the operation is placed in operand a, while b is unmodified. -void FallbackMinMaxU16(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b, MinMaxOperation op) { +void FallbackMinMaxU16(BlockOfCode& code, EmitContext& ctx, auto const& a, auto const& b, MinMaxOperation op) { if(op == MinMaxOperation::Min) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psubusw(c, b); code.psubw(a, c); @@ -1945,8 +1948,8 @@ void FallbackMinMaxU16(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, // Compute the minimum/maximum of two vectors of signed 32-bit integers, using only SSE2 instructons. // The result of the operation is placed in operand a, while b is unmodified. -void FallbackMinMaxS32(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b, MinMaxOperation op) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); +void FallbackMinMaxS32(BlockOfCode& code, EmitContext& ctx, auto const& a, auto const& b, MinMaxOperation op) { + auto const c = ctx.reg_alloc.ScratchXmm(code); if(op == MinMaxOperation::Min) { code.movdqa(c, b); code.pcmpgtd(c, a); @@ -1962,12 +1965,12 @@ void FallbackMinMaxS32(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, // Compute the minimum/maximum of two vectors of unsigned 32-bit integers, using only SSE2 instructons. // The result of the operation is placed in operand a, while b is unmodified. -void FallbackMinMaxU32(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b, MinMaxOperation op) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); +void FallbackMinMaxU32(BlockOfCode& code, EmitContext& ctx, auto const& a, auto const& b, MinMaxOperation op) { + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, code.BConst<32>(xword, 0x80000000)); // bias a and b by XORing their sign bits, then use the signed comparison function - const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(code); + auto const d = ctx.reg_alloc.ScratchXmm(code); if(op == MinMaxOperation::Min) { code.movdqa(d, a); code.pxor(d, c); @@ -1989,11 +1992,16 @@ void EmitX64::EmitVectorMaxS8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsb); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); - FallbackMinMaxS8(code, ctx, a, b, MinMaxOperation::Max); - ctx.reg_alloc.DefineValue(code, inst, a); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const c = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(c, a); + code.pcmpgtb(c, b); + code.pand(a, c); + code.pandn(c, b); + code.por(a, c); + ctx.reg_alloc.DefineValue(code, inst, a); } } @@ -2005,31 +2013,55 @@ void EmitX64::EmitVectorMaxS32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsd); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); - FallbackMinMaxS32(code, ctx, a, b, MinMaxOperation::Max); - ctx.reg_alloc.DefineValue(code, inst, a); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, tmp0); + code.pcmpgtd(tmp2, tmp1); + code.pand(tmp0, tmp2); + code.pandn(tmp2, tmp1); + code.por(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } } void EmitX64::EmitVectorMaxS64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpmaxsq); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + code.vpmaxsq(xmm_a, xmm_a, xmm_b); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } else if (code.HasHostFeature(HostFeature::AVX)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); code.vpcmpgtq(xmm0, y, x); code.pblendvb(x, y); - ctx.reg_alloc.DefineValue(code, inst, x); } else { - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::max)(x, y); }); - }); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp5 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, code.Const(xword, 0x8000'0000, 0x8000'0000)); + code.movdqa(tmp3, tmp1); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp0); + code.movdqa(tmp4, tmp2); + code.pcmpeqd(tmp2, tmp3); + code.pcmpgtd(tmp4, tmp3); + code.pshufd(tmp2, tmp2, 245); + code.pshufd(tmp5, tmp4, 160); + code.pshufd(tmp3, tmp4, 245); + code.pand(tmp2, tmp5); + code.por(tmp3, tmp2); + code.pand(tmp0, tmp3); + code.pandn(tmp3, tmp1); + code.por(tmp0, tmp3); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } } @@ -2041,11 +2073,11 @@ void EmitX64::EmitVectorMaxU16(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxuw); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); FallbackMinMaxU16(code, ctx, a, b, MinMaxOperation::Max); - ctx.reg_alloc.DefineValue(code, inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } } @@ -2053,35 +2085,54 @@ void EmitX64::EmitVectorMaxU32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxud); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); FallbackMinMaxU32(code, ctx, a, b, MinMaxOperation::Max); - ctx.reg_alloc.DefineValue(code, inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } } void EmitX64::EmitVectorMaxU64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpmaxuq); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + code.vpmaxuq(xmm_a, xmm_a, xmm_b); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } else if (code.HasHostFeature(HostFeature::AVX)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.vmovdqa(xmm0, code.Const(xword, 0x8000000000000000, 0x8000000000000000)); code.vpsubq(tmp, y, xmm0); code.vpsubq(xmm0, x, xmm0); code.vpcmpgtq(xmm0, tmp, xmm0); code.pblendvb(x, y); - ctx.reg_alloc.DefineValue(code, inst, x); } else { - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::max)(x, y); }); - }); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp5 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); + code.movdqa(tmp3, tmp1); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp0); + code.movdqa(tmp4, tmp2); + code.pcmpeqd(tmp2, tmp3); + code.pcmpgtd(tmp4, tmp3); + code.pshufd(tmp2, tmp2, 245); + code.pshufd(tmp5, tmp4, 160); + code.pshufd(tmp3, tmp4, 245); + code.pand(tmp2, tmp5); + code.por(tmp3, tmp2); + code.pand(tmp0, tmp3); + code.pandn(tmp3, tmp1); + code.por(tmp0, tmp3); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } } @@ -2089,11 +2140,16 @@ void EmitX64::EmitVectorMinS8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminsb); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); - FallbackMinMaxS8(code, ctx, a, b, MinMaxOperation::Min); - ctx.reg_alloc.DefineValue(code, inst, a); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const c = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(c, b); + code.pcmpgtb(c, a); + code.pand(a, c); + code.pandn(c, b); + code.por(a, c); + ctx.reg_alloc.DefineValue(code, inst, a); } } @@ -2105,31 +2161,51 @@ void EmitX64::EmitVectorMinS32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminsd); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); FallbackMinMaxS32(code, ctx, a, b, MinMaxOperation::Min); - ctx.reg_alloc.DefineValue(code, inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } } void EmitX64::EmitVectorMinS64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpminsq); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + code.vpminsq(xmm_a, xmm_a, xmm_b); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } else if (code.HasHostFeature(HostFeature::AVX)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.vpcmpgtq(xmm0, y, x); code.pblendvb(y, x); - ctx.reg_alloc.DefineValue(code, inst, y); } else { - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::min)(x, y); }); - }); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp5 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, code.Const(xword, 0x8000'0000, 0x8000'0000)); + code.movdqa(tmp3, tmp1); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp0); + code.movdqa(tmp4, tmp2); + code.pcmpeqd(tmp2, tmp3); + code.pcmpgtd(tmp4, tmp3); + code.pshufd(tmp3, tmp2, 245); + code.pshufd(tmp5, tmp4, 160); + code.pshufd(tmp2, tmp4, 245); + code.pand(tmp3, tmp5); + code.por(tmp2, tmp3); + code.pand(tmp1, tmp2); + code.pandn(tmp2, tmp0); + code.por(tmp2, tmp1); + //code.movdqa(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp2); } } @@ -2141,11 +2217,11 @@ void EmitX64::EmitVectorMinU16(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminuw); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); FallbackMinMaxU16(code, ctx, a, b, MinMaxOperation::Min); - ctx.reg_alloc.DefineValue(code, inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } } @@ -2153,57 +2229,93 @@ void EmitX64::EmitVectorMinU32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminud); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); FallbackMinMaxU32(code, ctx, a, b, MinMaxOperation::Min); - ctx.reg_alloc.DefineValue(code, inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } } void EmitX64::EmitVectorMinU64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpminuq); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + code.vpminuq(xmm_a, xmm_a, xmm_b); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } else if (code.HasHostFeature(HostFeature::AVX)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.vmovdqa(xmm0, code.Const(xword, 0x8000000000000000, 0x8000000000000000)); code.vpsubq(tmp, y, xmm0); code.vpsubq(xmm0, x, xmm0); code.vpcmpgtq(xmm0, tmp, xmm0); code.pblendvb(y, x); - ctx.reg_alloc.DefineValue(code, inst, y); } else { - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::min)(x, y); }); - }); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp5 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); + code.movdqa(tmp3, tmp1); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp0); + code.movdqa(tmp4, tmp2); + code.pcmpeqd(tmp2, tmp3); + code.pcmpgtd(tmp4, tmp3); + code.pshufd(tmp3, tmp2, 245); + code.pshufd(tmp5, tmp4, 160); + code.pshufd(tmp2, tmp4, 245); + code.pand(tmp3, tmp5); + code.por(tmp2, tmp3); + code.pand(tmp1, tmp2); + code.pandn(tmp2, tmp0); + code.por(tmp2, tmp1); + //code.movdqa(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp2); } } void EmitX64::EmitVectorMultiply8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm(code); - - // TODO: Optimize - code.movdqa(tmp_a, a); - code.movdqa(tmp_b, b); - code.pmullw(a, b); - code.psrlw(tmp_a, 8); - code.psrlw(tmp_b, 8); - code.pmullw(tmp_a, tmp_b); - code.pand(a, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); - code.psllw(tmp_a, 8); - code.por(a, tmp_a); - - ctx.reg_alloc.DefineValue(code, inst, a); + if (code.HasHostFeature(HostFeature::AVX)) { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + code.vbroadcastss(tmp3, code.Const(dword, 0x00ff'00ff)); + code.vpmullw(tmp2, tmp1, tmp0); + code.vpandn(tmp0, tmp3, tmp0); + code.vpand(tmp2, tmp2, tmp3); + code.vpmaddubsw(tmp0, tmp1, tmp0); + code.vpsllw(tmp0, tmp0, 8); + code.vpor(tmp0, tmp2, tmp0); + ctx.reg_alloc.DefineValue(code, inst, tmp0); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, tmp0); + code.movdqa(tmp3, tmp1); + code.movdqa(tmp4, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); + code.punpckhbw(tmp2, tmp2); + code.punpckhbw(tmp3, tmp3); + code.punpcklbw(tmp0, tmp0); + code.punpcklbw(tmp1, tmp1); + code.pmullw(tmp3, tmp2); + code.pmullw(tmp0, tmp1); + code.pand(tmp3, tmp4); + code.pand(tmp0, tmp4); + code.packuswb(tmp0, tmp3); + ctx.reg_alloc.DefineValue(code, inst, tmp0); + } } void EmitX64::EmitVectorMultiply16(EmitContext& ctx, IR::Inst* inst) { @@ -2214,31 +2326,32 @@ void EmitX64::EmitVectorMultiply32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmulld); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - - code.movdqa(tmp, a); - code.psrlq(a, 32); - code.pmuludq(tmp, b); - code.psrlq(b, 32); - code.pmuludq(a, b); - code.pshufd(tmp, tmp, 0b00001000); - code.pshufd(b, a, 0b00001000); - code.punpckldq(tmp, b); - - ctx.reg_alloc.DefineValue(code, inst, tmp); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp, a); + code.psrlq(a, 32); + code.pmuludq(tmp, b); + code.psrlq(b, 32); + code.pmuludq(a, b); + code.pshufd(tmp, tmp, 0b00001000); + code.pshufd(b, a, 0b00001000); + code.punpckldq(tmp, b); + ctx.reg_alloc.DefineValue(code, inst, tmp); } } void EmitX64::EmitVectorMultiply64(EmitContext& ctx, IR::Inst* inst) { - if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512DQ)) { - EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpmullq); - } else if (code.HasHostFeature(HostFeature::SSE41)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512DQ)) { + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + code.vpmullq(xmm_a, xmm_a, xmm_b); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); + } else if (code.HasHostFeature(HostFeature::SSE41)) { + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); const Xbyak::Reg64 tmp1 = ctx.reg_alloc.ScratchGpr(code); const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(code); @@ -2253,29 +2366,28 @@ void EmitX64::EmitVectorMultiply64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, a); } else { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); - code.movdqa(tmp1, a); - code.movdqa(tmp2, a); - code.movdqa(tmp3, b); + code.movdqa(tmp1, a); + code.movdqa(tmp2, a); + code.movdqa(tmp3, b); - code.psrlq(tmp1, 32); - code.psrlq(tmp3, 32); + code.psrlq(tmp1, 32); + code.psrlq(tmp3, 32); - code.pmuludq(tmp2, b); - code.pmuludq(tmp3, a); - code.pmuludq(b, tmp1); + code.pmuludq(tmp2, b); + code.pmuludq(tmp3, a); + code.pmuludq(b, tmp1); - code.paddq(b, tmp3); - code.psllq(b, 32); - code.paddq(tmp2, b); + code.paddq(b, tmp3); + code.psllq(b, 32); + code.paddq(tmp2, b); - ctx.reg_alloc.DefineValue(code, inst, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp2); } } @@ -2307,15 +2419,15 @@ void EmitX64::EmitVectorNarrow16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmovwb(result, a); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.pand(a, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); @@ -2328,13 +2440,13 @@ void EmitX64::EmitVectorNarrow16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorNarrow32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmovdw(result, a); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); if (code.HasHostFeature(HostFeature::SSE41)) { code.pblendw(a, zeros, 0b10101010); @@ -2352,15 +2464,15 @@ void EmitX64::EmitVectorNarrow64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmovqd(result, a); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.shufps(a, zeros, 0b00001000); @@ -2373,13 +2485,13 @@ void EmitX64::EmitVectorNot(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); + auto const operand = ctx.reg_alloc.UseXmm(code, args[0]); code.vpternlogq(result, operand, operand, u8(~Tern::c)); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqw(xmm_b, xmm_b); code.pxor(xmm_a, xmm_b); ctx.reg_alloc.DefineValue(code, inst, xmm_a); @@ -2393,9 +2505,9 @@ void EmitX64::EmitVectorOr(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAddLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.punpcklqdq(xmm_a, xmm_b); code.movdqa(tmp, xmm_a); @@ -2411,9 +2523,9 @@ void EmitX64::EmitVectorPairedAddLower8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAddLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.punpcklqdq(xmm_a, xmm_b); if (code.HasHostFeature(HostFeature::SSSE3)) { @@ -2434,9 +2546,9 @@ void EmitX64::EmitVectorPairedAddLower16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.punpcklqdq(xmm_a, xmm_b); if (code.HasHostFeature(HostFeature::SSSE3)) { @@ -2456,10 +2568,10 @@ void EmitX64::EmitVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAdd8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const c = ctx.reg_alloc.ScratchXmm(code); + auto const d = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.movdqa(d, b); @@ -2478,17 +2590,17 @@ void EmitX64::EmitVectorPairedAdd16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); code.phaddw(a, b); ctx.reg_alloc.DefineValue(code, inst, a); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const c = ctx.reg_alloc.ScratchXmm(code); + auto const d = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.movdqa(d, b); @@ -2508,17 +2620,17 @@ void EmitX64::EmitVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); code.phaddd(a, b); ctx.reg_alloc.DefineValue(code, inst, a); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const c = ctx.reg_alloc.ScratchXmm(code); + auto const d = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.movdqa(d, b); @@ -2535,9 +2647,9 @@ void EmitX64::EmitVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.punpcklqdq(a, b); @@ -2550,8 +2662,8 @@ void EmitX64::EmitVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAddSignedWiden8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psllw(a, 8); @@ -2565,8 +2677,8 @@ void EmitX64::EmitVectorPairedAddSignedWiden8(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorPairedAddSignedWiden16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.pslld(a, 16); @@ -2580,18 +2692,18 @@ void EmitX64::EmitVectorPairedAddSignedWiden16(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.vpsraq(c, a, 32); code.vpsllq(a, a, 32); code.vpsraq(a, a, 32); code.vpaddq(a, a, c); } else { - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psllq(a, 32); @@ -2613,8 +2725,8 @@ void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorPairedAddUnsignedWiden8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psllw(a, 8); @@ -2628,8 +2740,8 @@ void EmitX64::EmitVectorPairedAddUnsignedWiden8(EmitContext& ctx, IR::Inst* inst void EmitX64::EmitVectorPairedAddUnsignedWiden16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.pslld(a, 16); @@ -2643,8 +2755,8 @@ void EmitX64::EmitVectorPairedAddUnsignedWiden16(EmitContext& ctx, IR::Inst* ins void EmitX64::EmitVectorPairedAddUnsignedWiden32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psllq(a, 32); @@ -2658,14 +2770,10 @@ void EmitX64::EmitVectorPairedAddUnsignedWiden32(EmitContext& ctx, IR::Inst* ins template static void PairedOperation(VectorArray& result, const VectorArray& x, const VectorArray& y, Function fn) { const size_t range = x.size() / 2; - - for (size_t i = 0; i < range; i++) { + for (size_t i = 0; i < range; i++) result[i] = fn(x[2 * i], x[2 * i + 1]); - } - - for (size_t i = 0; i < range; i++) { + for (size_t i = 0; i < range; i++) result[range + i] = fn(y[2 * i], y[2 * i + 1]); - } } template @@ -2686,11 +2794,6 @@ static void PairedMax(VectorArray& result, const VectorArray& x, const Vec PairedOperation(result, x, y, [](auto a, auto b) { return (std::max)(a, b); }); } -template -static void PairedMin(VectorArray& result, const VectorArray& x, const VectorArray& y) { - PairedOperation(result, x, y, [](auto a, auto b) { return (std::min)(a, b); }); -} - template static void LowerPairedMax(VectorArray& result, const VectorArray& x, const VectorArray& y) { LowerPairedOperation(result, x, y, [](auto a, auto b) { return (std::max)(a, b); }); @@ -2705,19 +2808,16 @@ template static void EmitVectorPairedMinMax8(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, code.Const(xword, 0x0E'0C'0A'08'06'04'02'00, 0x0F'0D'0B'09'07'05'03'01)); code.pshufb(x, tmp); code.pshufb(y, tmp); - code.movaps(tmp, x); code.shufps(tmp, y, 0b01'00'01'00); - code.shufps(x, y, 0b11'10'11'10); - if constexpr (std::is_member_function_pointer_v) { (code.*fn)(x, tmp); } else { @@ -2730,21 +2830,17 @@ static void EmitVectorPairedMinMax8(BlockOfCode& code, EmitContext& ctx, IR::Ins template static void EmitVectorPairedMinMaxLower8(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.punpcklqdq(x, y); code.pshufb(x, code.Const(xword, 0x0E'0C'0A'08'06'04'02'00, 0x0F'0D'0B'09'07'05'03'01)); code.movhlps(y, x); code.movq(x, x); - if constexpr (std::is_member_function_pointer_v) { (code.*fn)(x, y); } else { fn(x, y); } - ctx.reg_alloc.DefineValue(code, inst, x); } @@ -2752,9 +2848,9 @@ template static void EmitVectorPairedMinMax16(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); // swap idxs 1 and 2 within 64-bit lanes so that both registers contain [even, odd, even, odd]-indexed pairs of elements code.pshuflw(x, x, 0b11'01'10'00); @@ -2780,63 +2876,31 @@ static void EmitVectorPairedMinMax16(BlockOfCode& code, EmitContext& ctx, IR::In ctx.reg_alloc.DefineValue(code, inst, x); } -template -static void EmitVectorPairedMinMaxLower16(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - - // swap idxs 1 and 2 so that both registers contain even then odd-indexed pairs of elements - code.pshuflw(x, x, 0b11'01'10'00); - code.pshuflw(y, y, 0b11'01'10'00); - - // move pairs of even/odd-indexed elements into one register each - - // tmp = x[0, 2], y[0, 2], 0s... - code.movaps(tmp, y); - code.insertps(tmp, x, 0b01001100); - // x = x[1, 3], y[1, 3], 0s... - code.insertps(x, y, 0b00011100); - - (code.*fn)(x, tmp); - - ctx.reg_alloc.DefineValue(code, inst, x); -} - -static void EmitVectorPairedMinMaxLower32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - - // tmp = x[1], y[1], 0, 0 - code.movaps(tmp, y); - code.insertps(tmp, x, 0b01001100); - // x = x[0], y[0], 0, 0 - code.insertps(x, y, 0b00011100); - - (code.*fn)(x, tmp); - - ctx.reg_alloc.DefineValue(code, inst, x); -} void EmitX64::EmitVectorPairedMaxS8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp, code.Const(xword, 0x0E'0C'0A'08'06'04'02'00, 0x0F'0D'0B'09'07'05'03'01)); + code.pshufb(x, tmp); + code.pshufb(y, tmp); + code.movaps(tmp, x); + code.shufps(tmp, y, 0b01'00'01'00); + code.shufps(x, y, 0b11'10'11'10); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMax8(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsb); - return; - } else if (code.HasHostFeature(HostFeature::SSSE3)) { - EmitVectorPairedMinMax8(code, ctx, inst, [&](const auto& lhs, const auto& rhs) { - FallbackMinMaxS8(code, ctx, lhs, rhs, MinMaxOperation::Max); - }); - return; + code.pmaxsb(x, tmp); + } else { + auto const a = x; + auto const b = tmp; + auto const c = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(c, a); + code.pcmpgtb(c, b); + code.pand(a, c); + code.pandn(c, b); + code.por(a, c); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - PairedMax(result, a, b); - }); + ctx.reg_alloc.DefineValue(code, inst, x); } void EmitX64::EmitVectorPairedMaxS16(EmitContext& ctx, IR::Inst* inst) { @@ -2846,9 +2910,9 @@ void EmitX64::EmitVectorPairedMaxS16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMaxS32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, x); code.shufps(tmp, y, 0b10001000); @@ -2866,12 +2930,24 @@ void EmitX64::EmitVectorPairedMaxS32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMaxU8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSSE3)) { EmitVectorPairedMinMax8(code, ctx, inst, &Xbyak::CodeGenerator::pmaxub); - return; + } else { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const constant_00ff = code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF); + code.movdqa(tmp2, constant_00ff); + code.movdqa(tmp3, tmp1); + code.pand(tmp3, tmp2); + code.pand(tmp2, tmp0); + code.packuswb(tmp2, tmp3); + code.psrlw(tmp1, 8); + code.psrlw(tmp0, 8); + code.packuswb(tmp0, tmp1); + code.pmaxub(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - PairedMax(result, a, b); - }); } void EmitX64::EmitVectorPairedMaxU16(EmitContext& ctx, IR::Inst* inst) { @@ -2887,9 +2963,9 @@ void EmitX64::EmitVectorPairedMaxU16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMaxU32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, x); code.shufps(tmp, y, 0b10001000); @@ -2907,14 +2983,15 @@ void EmitX64::EmitVectorPairedMaxU32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMinS8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorPairedMinMax8(code, ctx, inst, &Xbyak::CodeGenerator::pminsb); - } else if (code.HasHostFeature(HostFeature::SSSE3)) { - EmitVectorPairedMinMax8(code, ctx, inst, [&](const auto& lhs, const auto& rhs) { - FallbackMinMaxS8(code, ctx, lhs, rhs, MinMaxOperation::Min); - }); } else { - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - PairedMin(result, a, b); - }); + EmitVectorPairedMinMax8(code, ctx, inst, [&](const auto& a, const auto& b) { + auto const c = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(c, b); + code.pcmpgtb(c, a); + code.pand(a, c); + code.pandn(c, b); + code.por(a, c); + }); } } @@ -2925,9 +3002,9 @@ void EmitX64::EmitVectorPairedMinS16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMinS32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, x); code.shufps(tmp, y, 0b10001000); @@ -2943,12 +3020,25 @@ void EmitX64::EmitVectorPairedMinS32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMinU8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSSE3)) { EmitVectorPairedMinMax8(code, ctx, inst, &Xbyak::CodeGenerator::pminub); - return; + } else { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const constant_00ff = code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF); + code.movdqa(tmp2, tmp1); + code.psrlw(tmp2, 8); + code.movdqa(tmp3, tmp0); + code.psrlw(tmp3, 8); + code.packuswb(tmp3, tmp2); + code.movdqa(tmp2, constant_00ff); + code.pand(tmp1, tmp2); + code.pand(tmp0, tmp2); + code.packuswb(tmp0, tmp1); + code.pminub(tmp0, tmp3); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - PairedMin(result, a, b); - }); } void EmitX64::EmitVectorPairedMinU16(EmitContext& ctx, IR::Inst* inst) { @@ -2964,9 +3054,9 @@ void EmitX64::EmitVectorPairedMinU16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMinU32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, x); code.shufps(tmp, y, 0b10001000); @@ -2982,41 +3072,88 @@ void EmitX64::EmitVectorPairedMinU32(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitVectorPairedMaxLowerS8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower8(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsb); - return; - } else if (code.HasHostFeature(HostFeature::SSSE3)) { - EmitVectorPairedMinMaxLower8(code, ctx, inst, [&](const auto& lhs, const auto& rhs) { - FallbackMinMaxS8(code, ctx, lhs, rhs, MinMaxOperation::Max); - }); - return; + code.punpcklqdq(x, y); + code.pshufb(x, code.Const(xword, 0x0E'0C'0A'08'06'04'02'00, 0x0F'0D'0B'09'07'05'03'01)); + code.movhlps(y, x); + code.movq(x, x); + code.pmaxsb(x, y); + } else { + auto const c = ctx.reg_alloc.ScratchXmm(code); + code.punpcklqdq(x, y); + code.pshufb(x, code.Const(xword, 0x0E'0C'0A'08'06'04'02'00, 0x0F'0D'0B'09'07'05'03'01)); + code.movhlps(y, x); + code.movq(x, x); + code.movdqa(c, x); + code.pcmpgtb(c, y); + code.pand(x, c); + code.pandn(c, y); + code.por(x, c); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMax(result, a, b); - }); + ctx.reg_alloc.DefineValue(code, inst, x); } void EmitX64::EmitVectorPairedMaxLowerS16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower16(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsw); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // swap idxs 1 and 2 so that both registers contain even then odd-indexed pairs of elements + code.pshuflw(x, x, 0b11'01'10'00); + code.pshuflw(y, y, 0b11'01'10'00); + // move pairs of even/odd-indexed elements into one register each + // tmp = x[0, 2], y[0, 2], 0s... + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[1, 3], y[1, 3], 0s... + code.insertps(x, y, 0b00011100); + code.pmaxsw(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + code.punpcklwd(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 232); + code.pshuflw(tmp1, tmp1, 216); + code.pshufd(tmp0, tmp0, 231); + code.pshuflw(tmp0, tmp0, 114); + code.pmaxsw(tmp0, tmp1); + code.movq(tmp0, tmp0); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMax(result, a, b); - }); } void EmitX64::EmitVectorPairedMaxLowerS32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower32(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsd); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // tmp = x[1], y[1], 0, 0 + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[0], y[0], 0, 0 + code.insertps(x, y, 0b00011100); + code.pmaxsd(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.punpckldq(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 238); + code.movdqa(tmp2, tmp0); + code.pcmpgtd(tmp2, tmp1); + code.pand(tmp0, tmp2); + code.pandn(tmp2, tmp1); + code.por(tmp2, tmp0); + code.movq(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMax(result, a, b); - }); } void EmitX64::EmitVectorPairedMaxLowerU8(EmitContext& ctx, IR::Inst* inst) { @@ -3031,63 +3168,143 @@ void EmitX64::EmitVectorPairedMaxLowerU8(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitVectorPairedMaxLowerU16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower16(code, ctx, inst, &Xbyak::CodeGenerator::pmaxuw); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // swap idxs 1 and 2 so that both registers contain even then odd-indexed pairs of elements + code.pshuflw(x, x, 0b11'01'10'00); + code.pshuflw(y, y, 0b11'01'10'00); + // move pairs of even/odd-indexed elements into one register each + // tmp = x[0, 2], y[0, 2], 0s... + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[1, 3], y[1, 3], 0s... + code.insertps(x, y, 0b00011100); + code.pmaxuw(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + code.punpcklwd(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 232); + code.pshuflw(tmp1, tmp1, 216); + code.pshufd(tmp0, tmp0, 231); + code.pshuflw(tmp0, tmp0, 114); + code.psubusw(tmp0, tmp1); + code.paddw(tmp0, tmp1); + code.movq(tmp0, tmp0); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMax(result, a, b); - }); } void EmitX64::EmitVectorPairedMaxLowerU32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower32(code, ctx, inst, &Xbyak::CodeGenerator::pmaxud); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // tmp = x[1], y[1], 0, 0 + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[0], y[0], 0, 0 + code.insertps(x, y, 0b00011100); + code.pmaxud(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + code.punpckldq(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 238); + code.movdqa(tmp2, code.Const(xword, 0x8000'00008000'0000, 0x8000'00008000'0000)); + code.movdqa(tmp3, tmp0); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp1); + code.pcmpgtd(tmp3, tmp2); + code.pand(tmp0, tmp3); + code.pandn(tmp3, tmp1); + code.por(tmp3, tmp0); + code.movq(tmp0, tmp3); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMax(result, a, b); - }); } void EmitX64::EmitVectorPairedMinLowerS8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorPairedMinMaxLower8(code, ctx, inst, &Xbyak::CodeGenerator::pminsb); - return; - } else if (code.HasHostFeature(HostFeature::SSSE3)) { - EmitVectorPairedMinMaxLower8(code, ctx, inst, [&](const auto& lhs, const auto& rhs) { - FallbackMinMaxS8(code, ctx, lhs, rhs, MinMaxOperation::Min); + } else { + EmitVectorPairedMinMaxLower8(code, ctx, inst, [&](const auto& a, const auto& b) { + auto const c = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(c, b); + code.pcmpgtb(c, a); + code.pand(a, c); + code.pandn(c, b); + code.por(a, c); }); - return; } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMin(result, a, b); - }); } void EmitX64::EmitVectorPairedMinLowerS16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower16(code, ctx, inst, &Xbyak::CodeGenerator::pminsw); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // swap idxs 1 and 2 so that both registers contain even then odd-indexed pairs of elements + code.pshuflw(x, x, 0b11'01'10'00); + code.pshuflw(y, y, 0b11'01'10'00); + // move pairs of even/odd-indexed elements into one register each + // tmp = x[0, 2], y[0, 2], 0s... + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[1, 3], y[1, 3], 0s... + code.insertps(x, y, 0b00011100); + code.pminsw(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + code.punpcklwd(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 231); + code.pshuflw(tmp1, tmp1, 114); + code.pshufd(tmp0, tmp0, 232); + code.pshuflw(tmp0, tmp0, 216); + code.pminsw(tmp0, tmp1); + code.movq(tmp0, tmp0); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMin(result, a, b); - }); } void EmitX64::EmitVectorPairedMinLowerS32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower32(code, ctx, inst, &Xbyak::CodeGenerator::pminsd); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // tmp = x[1], y[1], 0, 0 + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[0], y[0], 0, 0 + code.insertps(x, y, 0b00011100); + code.pminsd(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.punpckldq(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 238); + code.movdqa(tmp2, tmp0); + code.pcmpgtd(tmp2, tmp1); + code.pand(tmp1, tmp2); + code.pandn(tmp2, tmp0); + code.por(tmp2, tmp1); + code.movq(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMin(result, a, b); - }); } void EmitX64::EmitVectorPairedMinLowerU8(EmitContext& ctx, IR::Inst* inst) { @@ -3102,50 +3319,91 @@ void EmitX64::EmitVectorPairedMinLowerU8(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitVectorPairedMinLowerU16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower16(code, ctx, inst, &Xbyak::CodeGenerator::pminuw); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // swap idxs 1 and 2 so that both registers contain even then odd-indexed pairs of elements + code.pshuflw(x, x, 0b11'01'10'00); + code.pshuflw(y, y, 0b11'01'10'00); + // move pairs of even/odd-indexed elements into one register each + // tmp = x[0, 2], y[0, 2], 0s... + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[1, 3], y[1, 3], 0s... + code.insertps(x, y, 0b00011100); + code.pminuw(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.punpcklwd(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 231); + code.pshuflw(tmp1, tmp1, 114); + code.pshufd(tmp0, tmp0, 232); + code.pshuflw(tmp0, tmp0, 216); + code.movdqa(tmp2, tmp1); + code.psubusw(tmp2, tmp0); + code.psubw(tmp1, tmp2); + code.movq(tmp0, tmp1); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMin(result, a, b); - }); } void EmitX64::EmitVectorPairedMinLowerU32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - EmitVectorPairedMinMaxLower32(code, ctx, inst, &Xbyak::CodeGenerator::pminud); - return; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + // tmp = x[1], y[1], 0, 0 + code.movaps(tmp, y); + code.insertps(tmp, x, 0b01001100); + // x = x[0], y[0], 0, 0 + code.insertps(x, y, 0b00011100); + code.pminud(x, tmp); + ctx.reg_alloc.DefineValue(code, inst, x); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + code.punpckldq(tmp0, tmp1); + code.pshufd(tmp1, tmp0, 238); + code.movdqa(tmp2, code.Const(xword, 0x8000'00008000'0000, 0x8000'00008000'0000)); + code.movdqa(tmp3, tmp0); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp1); + code.pcmpgtd(tmp3, tmp2); + code.pand(tmp1, tmp3); + code.pandn(tmp3, tmp0); + code.por(tmp3, tmp1); + code.movq(tmp0, tmp3); + ctx.reg_alloc.DefineValue(code, inst, tmp0); } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - LowerPairedMin(result, a, b); - }); } template static D PolynomialMultiply(T lhs, T rhs) { constexpr size_t bit_size = mcl::bitsizeof; const std::bitset operand(lhs); - D res = 0; - for (size_t i = 0; i < bit_size; i++) { - if (operand[i]) { + for (size_t i = 0; i < bit_size; i++) + if (operand[i]) res ^= rhs << i; - } - } - return res; } void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm alternate = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); + auto const alternate = ctx.reg_alloc.ScratchXmm(code); + auto const mask = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Reg32 counter = ctx.reg_alloc.ScratchGpr(code).cvt32(); Xbyak::Label loop; @@ -3183,11 +3441,11 @@ void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPolynomialMultiplyLong8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm alternate = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); + auto const alternate = ctx.reg_alloc.ScratchXmm(code); + auto const mask = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Reg32 counter = ctx.reg_alloc.ScratchGpr(code).cvt32(); Xbyak::Label loop; @@ -3229,8 +3487,8 @@ void EmitX64::EmitVectorPolynomialMultiplyLong8(EmitContext& ctx, IR::Inst* inst void EmitX64::EmitVectorPolynomialMultiplyLong64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::PCLMULQDQ)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); code.pclmulqdq(xmm_a, xmm_b, 0x00); @@ -3260,7 +3518,7 @@ void EmitX64::EmitVectorPolynomialMultiplyLong64(EmitContext& ctx, IR::Inst* ins void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::AVX512BITALG)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpopcntb(data, data); @@ -3271,10 +3529,10 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSSE3)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm low_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm high_a = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const low_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const high_a = ctx.reg_alloc.ScratchXmm(code); + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(high_a, low_a); code.psrlw(high_a, 4); @@ -3303,12 +3561,12 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::GFNI)) { code.gf2p8affineqb(data, code.Const(xword, 0x8040201008040201, 0x8040201008040201), 0); } else { - const Xbyak::Xmm high_nibble_reg = ctx.reg_alloc.ScratchXmm(code); + auto const high_nibble_reg = ctx.reg_alloc.ScratchXmm(code); code.movdqa(high_nibble_reg, code.Const(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); code.pand(high_nibble_reg, data); code.pxor(data, high_nibble_reg); @@ -3316,7 +3574,7 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSSE3)) { // High lookup - const Xbyak::Xmm high_reversed_reg = ctx.reg_alloc.ScratchXmm(code); + auto const high_reversed_reg = ctx.reg_alloc.ScratchXmm(code); code.movdqa(high_reversed_reg, code.Const(xword, 0xE060A020C0408000, 0xF070B030D0509010)); code.pshufb(high_reversed_reg, data); @@ -3350,8 +3608,8 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReverseElementsInHalfGroups8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, data); code.psllw(tmp, 8); @@ -3363,13 +3621,13 @@ void EmitX64::EmitVectorReverseElementsInHalfGroups8(EmitContext& ctx, IR::Inst* void EmitX64::EmitVectorReverseElementsInWordGroups8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpshufb(data, data, code.Const(xword, 0x0405060700010203, 0x0c0d0e0f08090a0b)); } else if (code.HasHostFeature(HostFeature::SSSE3)) { code.pshufb(data, code.Const(xword, 0x0405060700010203, 0x0c0d0e0f08090a0b)); } else { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, data); code.psllw(tmp, 8); code.psrlw(data, 8); @@ -3382,7 +3640,7 @@ void EmitX64::EmitVectorReverseElementsInWordGroups8(EmitContext& ctx, IR::Inst* void EmitX64::EmitVectorReverseElementsInWordGroups16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(data, data, 0b10110001); code.pshufhw(data, data, 0b10110001); ctx.reg_alloc.DefineValue(code, inst, data); @@ -3390,13 +3648,13 @@ void EmitX64::EmitVectorReverseElementsInWordGroups16(EmitContext& ctx, IR::Inst void EmitX64::EmitVectorReverseElementsInLongGroups8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpshufb(data, data, code.Const(xword, 0x0001020304050607, 0x08090a0b0c0d0e0f)); } else if (code.HasHostFeature(HostFeature::SSSE3)) { code.pshufb(data, code.Const(xword, 0x0001020304050607, 0x08090a0b0c0d0e0f)); } else { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, data); code.psllw(tmp, 8); code.psrlw(data, 8); @@ -3410,7 +3668,7 @@ void EmitX64::EmitVectorReverseElementsInLongGroups8(EmitContext& ctx, IR::Inst* void EmitX64::EmitVectorReverseElementsInLongGroups16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(data, data, 0b00011011); code.pshufhw(data, data, 0b00011011); @@ -3421,7 +3679,7 @@ void EmitX64::EmitVectorReverseElementsInLongGroups16(EmitContext& ctx, IR::Inst void EmitX64::EmitVectorReverseElementsInLongGroups32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(data, data, 0b01001110); code.pshufhw(data, data, 0b01001110); @@ -3432,8 +3690,8 @@ void EmitX64::EmitVectorReverseElementsInLongGroups32(EmitContext& ctx, IR::Inst void EmitX64::EmitVectorReduceAdd8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm temp = xmm0; + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const temp = xmm0; // Add upper elements to lower elements code.pshufd(temp, data, 0b01'00'11'10); @@ -3453,8 +3711,8 @@ void EmitX64::EmitVectorReduceAdd8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReduceAdd16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm temp = xmm0; + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const temp = xmm0; if (code.HasHostFeature(HostFeature::SSSE3)) { code.pxor(temp, temp); @@ -3484,8 +3742,8 @@ void EmitX64::EmitVectorReduceAdd16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReduceAdd32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm temp = xmm0; + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const temp = xmm0; // Add upper elements to lower elements(reversed) code.pshufd(temp, data, 0b00'01'10'11); @@ -3508,8 +3766,8 @@ void EmitX64::EmitVectorReduceAdd32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReduceAdd64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm temp = xmm0; + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const temp = xmm0; // Add upper elements to lower elements code.pshufd(temp, data, 0b01'00'11'10); @@ -3524,8 +3782,8 @@ void EmitX64::EmitVectorReduceAdd64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorRotateWholeVectorRight(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const operand = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); const u8 shift_amount = args[1].GetImmediateU8(); ASSERT(shift_amount % 32 == 0); const u8 shuffle_imm = std::rotr(0b11100100, shift_amount / 32 * 2); @@ -3538,12 +3796,12 @@ void EmitX64::EmitVectorRotateWholeVectorRight(EmitContext& ctx, IR::Inst* inst) static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); switch (esize) { case 8: { - const Xbyak::Xmm vec_128 = ctx.reg_alloc.ScratchXmm(code); + auto const vec_128 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(vec_128, code.Const(xword, 0x8080808080808080, 0x8080808080808080)); code.paddb(a, vec_128); @@ -3553,7 +3811,7 @@ static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, I break; } case 16: { - const Xbyak::Xmm vec_32768 = ctx.reg_alloc.ScratchXmm(code); + auto const vec_32768 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(vec_32768, code.Const(xword, 0x8000800080008000, 0x8000800080008000)); code.paddw(a, vec_32768); @@ -3563,7 +3821,7 @@ static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, I break; } case 32: { - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp1, a); code.por(a, b); @@ -3603,9 +3861,9 @@ static void EmitVectorRoundingHalvingAddUnsigned(size_t esize, EmitContext& ctx, case 32: { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp1, a); @@ -3669,18 +3927,18 @@ static void EmitUnsignedRoundingShiftLeft(BlockOfCode& code, EmitContext& ctx, I static_assert(esize == 32 || esize == 64); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const b = ctx.reg_alloc.UseXmm(code, args[1]); // positive values of b are left shifts, while negative values are (positive) rounding right shifts // only the lowest byte of each element is read as the shift amount // conveniently, the behavior of bit shifts greater than element width is the same in NEON and SSE/AVX - filled with zeros - const Xbyak::Xmm shift_amount = ctx.reg_alloc.ScratchXmm(code); + auto const shift_amount = ctx.reg_alloc.ScratchXmm(code); code.vpabsb(shift_amount, b); code.vpand(shift_amount, shift_amount, code.BConst(xword, 0xFF)); // if b is positive, do a normal left shift - const Xbyak::Xmm left_shift = ctx.reg_alloc.ScratchXmm(code); + auto const left_shift = ctx.reg_alloc.ScratchXmm(code); ICODE(vpsllv)(left_shift, a, shift_amount); // if b is negative, compute the rounding right shift @@ -3691,7 +3949,7 @@ static void EmitUnsignedRoundingShiftLeft(BlockOfCode& code, EmitContext& ctx, I // tmp = (a >> (b - 1)) & 1 // res = (a >> b) + tmp // to add the value of the last bit to be shifted off to the result of the right shift - const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(code); + auto const right_shift = ctx.reg_alloc.ScratchXmm(code); code.vmovdqa(xmm0, code.BConst(xword, 1)); // find value of last bit to be shifted off @@ -3775,12 +4033,12 @@ void EmitX64::EmitVectorRoundingShiftLeftU64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignExtend8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pmovsxbw(a, a); ctx.reg_alloc.DefineValue(code, inst, a); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.pxor(result, result); code.punpcklbw(result, a); code.psraw(result, 8); @@ -3791,12 +4049,12 @@ void EmitX64::EmitVectorSignExtend8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignExtend16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pmovsxwd(a, a); ctx.reg_alloc.DefineValue(code, inst, a); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.pxor(result, result); code.punpcklwd(result, a); code.psrad(result, 16); @@ -3806,12 +4064,12 @@ void EmitX64::EmitVectorSignExtend16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignExtend32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { code.pmovsxdq(a, a); } else { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); code.movaps(tmp, a); code.psrad(tmp, 31); @@ -3824,7 +4082,7 @@ void EmitX64::EmitVectorSignExtend32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignExtend64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); const Xbyak::Reg64 gpr_tmp = ctx.reg_alloc.ScratchGpr(code); code.movq(gpr_tmp, data); @@ -3833,7 +4091,7 @@ void EmitX64::EmitVectorSignExtend64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { code.pinsrq(data, gpr_tmp, 1); } else { - const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(code); + auto const xmm_tmp = ctx.reg_alloc.ScratchXmm(code); code.movq(xmm_tmp, gpr_tmp); code.punpcklqdq(data, xmm_tmp); @@ -3844,9 +4102,9 @@ void EmitX64::EmitVectorSignExtend64(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorSignedAbsoluteDifference(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); // only signed 16-bit min/max are available below SSE4.1 if (code.HasHostFeature(HostFeature::SSE41) || esize == 16) { @@ -3912,11 +4170,11 @@ void EmitX64::EmitVectorSignedMultiply16(EmitContext& ctx, IR::Inst* inst) { const auto lower_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetLowerFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); if (upper_inst) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmulhw(result, x, y); } else { @@ -3928,7 +4186,7 @@ void EmitX64::EmitVectorSignedMultiply16(EmitContext& ctx, IR::Inst* inst) { } if (lower_inst) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmullw(result, x, y); } else { @@ -3946,9 +4204,9 @@ void EmitX64::EmitVectorSignedMultiply32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (lower_inst && !upper_inst && code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmulld(result, x, y); @@ -3957,16 +4215,16 @@ void EmitX64::EmitVectorSignedMultiply32(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (lower_inst) { - const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(code); + auto const lower_result = ctx.reg_alloc.ScratchXmm(code); code.vpmulld(lower_result, x, y); ctx.reg_alloc.DefineValue(code, lower_inst, lower_result); } - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmuldq(result, x, y); code.vpsrlq(x, x, 32); @@ -3978,12 +4236,12 @@ void EmitX64::EmitVectorSignedMultiply32(EmitContext& ctx, IR::Inst* inst) { return; } - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm sign_correction = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + auto const sign_correction = ctx.reg_alloc.ScratchXmm(code); + auto const upper_result = ctx.reg_alloc.ScratchXmm(code); + auto const lower_result = ctx.reg_alloc.ScratchXmm(code); // calculate sign correction code.movdqa(tmp, x); @@ -4026,7 +4284,7 @@ void EmitX64::EmitVectorSignedMultiply32(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr(code).cvt32(); // SSE absolute value functions return an unsigned result @@ -4038,21 +4296,34 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo // or shift in sign bits to create a mask of (msb == 1 ? -1 : 0), then add to the result vector switch (esize) { case 8: { - VectorAbs8(code, ctx, data); + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pabsb(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.pxor(temp, temp); + code.psubb(temp, data); + code.pminub(data, temp); + } code.pmovmskb(bit, data); - code.pminub(data, code.BConst<8>(xword, 0x7F)); break; } case 16: { - VectorAbs16(code, ctx, data); + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pabsw(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.pxor(temp, temp); + code.psubw(temp, data); + code.pmaxsw(data, temp); + } code.pmovmskb(bit, data); code.and_(bit, 0xAAAA); // toggle mask bits that aren't the msb of an int16 to 0 if (code.HasHostFeature(HostFeature::SSE41)) { code.pminuw(data, code.BConst<16>(xword, 0x7FFF)); } else { - const Xbyak::Xmm tmp = xmm0; + auto const tmp = xmm0; code.movdqa(tmp, data); code.psraw(data, 15); code.paddw(data, tmp); @@ -4060,13 +4331,21 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo break; } case 32: { - VectorAbs32(code, ctx, data); + if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pabsd(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(temp, data); + code.psrad(temp, 31); + code.pxor(data, temp); + code.psubd(data, temp); + } code.movmskps(bit, data); if (code.HasHostFeature(HostFeature::SSE41)) { code.pminud(data, code.BConst<32>(xword, 0x7FFFFFFF)); } else { - const Xbyak::Xmm tmp = xmm0; + auto const tmp = xmm0; code.movdqa(tmp, data); code.psrad(data, 31); code.paddd(data, tmp); @@ -4074,10 +4353,18 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo break; } case 64: { - VectorAbs64(code, ctx, data); + if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { + code.vpabsq(data, data); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + code.pshufd(temp, data, 0b11110101); + code.psrad(temp, 31); + code.pxor(data, temp); + code.psubq(data, temp); + } code.movmskpd(bit, data); - const Xbyak::Xmm tmp = xmm0; + auto const tmp = xmm0; if (code.HasHostFeature(HostFeature::SSE42)) { // create a -1 mask if msb is set code.pxor(tmp, tmp); @@ -4119,13 +4406,13 @@ template static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); code.movdqa(xmm0, y); ctx.reg_alloc.Release(y); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); switch (bit_width) { case 8: @@ -4182,7 +4469,7 @@ static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitC switch (bit_width) { case 8: if (code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqb(tmp2, tmp2); code.pxor(tmp, tmp); code.vpblendvb(xmm0, tmp, tmp2, xmm0); @@ -4262,10 +4549,10 @@ void EmitX64::EmitVectorSignedSaturatedAccumulateUnsigned64(EmitContext& ctx, IR template static void EmitVectorSignedSaturatedDoublingMultiply16(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm upper_tmp = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm lower_tmp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const upper_tmp = ctx.reg_alloc.ScratchXmm(code); + auto const lower_tmp = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmulhw(upper_tmp, x, y); @@ -4284,7 +4571,7 @@ static void EmitVectorSignedSaturatedDoublingMultiply16(BlockOfCode& code, EmitC ctx.reg_alloc.Release(x); ctx.reg_alloc.Release(y); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { if constexpr (is_rounding) { @@ -4334,10 +4621,10 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm odds = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm even = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const odds = ctx.reg_alloc.ScratchXmm(code); + auto const even = ctx.reg_alloc.ScratchXmm(code); code.vpmuldq(odds, x, y); code.vpsrlq(x, x, 32); @@ -4350,7 +4637,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& code.vpaddq(odds, odds, odds); code.vpaddq(even, even, even); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); if constexpr (is_rounding) { code.vmovdqa(result, code.Const(xword, 0x0000000080000000, 0x0000000080000000)); @@ -4361,7 +4648,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& code.vpsrlq(result, odds, 32); code.vblendps(result, result, even, 0b1010); - const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code); + auto const mask = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.vpcmpeqd(mask, result, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); @@ -4376,11 +4663,11 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& return; } - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm sign_correction = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + auto const sign_correction = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); // calculate sign correction code.movdqa(tmp, x); @@ -4439,8 +4726,8 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyHighRounding32(EmitContex void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.punpcklwd(x, x); code.punpcklwd(y, y); @@ -4465,8 +4752,8 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx, void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmovsxdq(x, x); @@ -4517,10 +4804,10 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx, static void EmitVectorSignedSaturatedNarrowToSigned(size_t original_esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm src = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm dest = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm reconstructed = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm sign = ctx.reg_alloc.ScratchXmm(code); + auto const src = ctx.reg_alloc.UseXmm(code, args[0]); + auto const dest = ctx.reg_alloc.ScratchXmm(code); + auto const reconstructed = ctx.reg_alloc.ScratchXmm(code); + auto const sign = ctx.reg_alloc.ScratchXmm(code); code.movdqa(dest, src); code.pxor(xmm0, xmm0); @@ -4577,9 +4864,9 @@ void EmitX64::EmitVectorSignedSaturatedNarrowToSigned64(EmitContext& ctx, IR::In static void EmitVectorSignedSaturatedNarrowToUnsigned(size_t original_esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm src = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm dest = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm reconstructed = ctx.reg_alloc.ScratchXmm(code); + auto const src = ctx.reg_alloc.UseXmm(code, args[0]); + auto const dest = ctx.reg_alloc.ScratchXmm(code); + auto const reconstructed = ctx.reg_alloc.ScratchXmm(code); code.movdqa(dest, src); code.pxor(xmm0, xmm0); @@ -4647,9 +4934,9 @@ void EmitX64::EmitVectorSignedSaturatedNarrowToUnsigned64(EmitContext& ctx, IR:: static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + auto const data = ctx.reg_alloc.UseXmm(code, args[0]); + auto const zero = ctx.reg_alloc.ScratchXmm(code); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Address mask = [esize, &code] { switch (esize) { case 8: @@ -4665,7 +4952,7 @@ static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitCo } }(); - const auto vector_equality = [esize, &code](const Xbyak::Xmm& x, const auto& y) { + const auto vector_equality = [esize, &code](auto const& x, const auto& y) { switch (esize) { case 8: code.pcmpeqb(x, y); @@ -4810,33 +5097,23 @@ void EmitX64::EmitVectorSignedSaturatedShiftLeft64(EmitContext& ctx, IR::Inst* i EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeft); } -template> +template static bool VectorSignedSaturatedShiftLeftUnsigned(VectorArray& dst, const VectorArray& data, u8 shift_amount) { + using U = std::make_unsigned_t; static_assert(std::is_signed_v, "T must be signed."); - bool qc_flag = false; for (size_t i = 0; i < dst.size(); i++) { - const T element = data[i]; - const T shift = static_cast(shift_amount); - - if (element == 0) { - dst[i] = 0; - } else if (element < 0) { - dst[i] = 0; - qc_flag = true; - } else { - const U shifted = static_cast(element) << static_cast(shift); - const U shifted_test = shifted >> static_cast(shift); - - if (shifted_test != static_cast(element)) { - dst[i] = static_cast((std::numeric_limits::max)()); - qc_flag = true; - } else { - dst[i] = shifted; - } - } + auto const element = data[i]; + auto const shifted = U(element) << U(T(shift_amount)); + auto const shifted_test = shifted >> U(T(shift_amount)); + auto result = 0; + if (element > 0 && shifted_test != U(element)) + result = T((std::numeric_limits::max)()); + if (element > 0 && shifted_test == U(element)) + result = shifted; + qc_flag |= element < 0 || (element > 0 && shifted_test != U(element)); + dst[i] = result; } - return qc_flag; } @@ -4849,7 +5126,97 @@ void EmitX64::EmitVectorSignedSaturatedShiftLeftUnsigned16(EmitContext& ctx, IR: } void EmitX64::EmitVectorSignedSaturatedShiftLeftUnsigned32(EmitContext& ctx, IR::Inst* inst) { - EmitTwoArgumentFallbackWithSaturationAndImmediate(code, ctx, inst, VectorSignedSaturatedShiftLeftUnsigned); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const imm8 = args[1].GetImmediateU8(); + if (code.HasHostFeature(HostFeature::AVX2)) { + auto const tmp_flag = ctx.reg_alloc.ScratchGpr(code); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + if (imm8 == 0) { + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.vpshufd(tmp1, tmp0, 85); + code.vpshufd(tmp2, tmp0, 238); + code.vpor(tmp1, tmp1, tmp2); + code.vpshufd(tmp2, tmp0, 255); + code.vpor(tmp2, tmp2, tmp0); + code.vpor(tmp1, tmp1, tmp2); + code.vmovd(tmp_flag.cvt32(), tmp1); + code.shr(tmp_flag.cvt32(), 31); + code.vpxor(tmp1, tmp1, tmp1); + code.vpmaxsd(tmp0, tmp0, tmp1); + } else { + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp4 = ctx.reg_alloc.ScratchXmm(code); + auto const cmp_value = u32(1ULL << 31) >> (imm8 - 1); + code.vpshufd(tmp1, tmp0, 238); + code.vpor(tmp1, tmp1, tmp0); + code.vpshufd(tmp2, tmp1, 85); + code.vpor(tmp1, tmp1, tmp2); + code.vmovd(tmp_flag.cvt32(), tmp1); + code.cmp(tmp_flag.cvt32(), cmp_value); + code.vpslld(tmp1, tmp0, imm8); + code.vpbroadcastd(tmp2, code.Const(dword, cmp_value - 2)); + code.vpbroadcastd(tmp3, code.Const(dword, cmp_value - 1)); + code.vpcmpgtd(tmp3, tmp0, tmp3); + code.vpcmpeqd(tmp4, tmp4, tmp4); + code.vpaddd(tmp0, tmp0, tmp4); + code.vpminud(tmp2, tmp0, tmp2); + code.vpcmpeqd(tmp0, tmp0, tmp2); + code.vblendvps(tmp0, tmp3, tmp1, tmp0); + code.setae(tmp_flag.cvt8()); + } + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp_flag.cvt8()); + ctx.reg_alloc.DefineValue(code, inst, tmp0); + } else { + auto const tmp_flag = ctx.reg_alloc.ScratchGpr(code); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + if (imm8 == 0) { + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.pshufd(tmp1, tmp0, 85); + code.pshufd(tmp2, tmp0, 238); + code.por(tmp2, tmp1); + code.pshufd(tmp1, tmp0, 255); + code.por(tmp1, tmp0); + code.por(tmp1, tmp2); + code.movd(tmp_flag.cvt32(), tmp1); + code.shr(tmp_flag.cvt32(), 31); + code.pxor(tmp1, tmp1); + code.movdqa(tmp2, tmp0); + code.pcmpgtd(tmp2, tmp1); + code.pand(tmp0, tmp2); + } else { + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + u64 const cmp_value = u64(1ULL << 31) >> (imm8 - 1); + u64 const cmp_one = cmp_value - 1; + u64 const cmp_add = (cmp_value - 2) + 0x80000000; + code.pshufd(tmp1, tmp0, 238); + code.por(tmp1, tmp0); + code.pshufd(tmp2, tmp1, 85); + code.por(tmp2, tmp1); + code.movd(tmp_flag.cvt32(), tmp2); + code.cmp(tmp_flag.cvt32(), cmp_value); + code.movdqa(tmp1, tmp0); + code.pslld(tmp1, imm8); + code.movdqa(tmp2, tmp0); + code.pcmpgtd(tmp2, code.Const(xword, cmp_one | (cmp_one << 32), cmp_one | (cmp_one << 32))); + code.pcmpeqd(tmp3, tmp3); + code.paddd(tmp0, tmp3); + code.pxor(tmp0, code.Const(xword, 0x80000000'80000000, 0x80000000'80000000)); + code.pcmpgtd(tmp0, code.Const(xword, cmp_add | (cmp_add << 32), cmp_add | (cmp_add << 32))); + code.pand(tmp2, tmp0); + code.pandn(tmp0, tmp1); + code.por(tmp0, tmp2); + code.setae(tmp_flag.cvt8()); + } + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp_flag.cvt8()); + ctx.reg_alloc.DefineValue(code, inst, tmp0); +// EmitTwoArgumentFallbackWithSaturationAndImmediate(code, ctx, inst, VectorSignedSaturatedShiftLeftUnsigned); + } } void EmitX64::EmitVectorSignedSaturatedShiftLeftUnsigned64(EmitContext& ctx, IR::Inst* inst) { @@ -4887,7 +5254,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { const bool is_defaults_zero = inst->GetArg(0).IsZero(); if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI)) { - const Xbyak::Xmm indicies = table_size <= 2 ? ctx.reg_alloc.UseXmm(code, args[2]) : ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const indicies = table_size <= 2 ? ctx.reg_alloc.UseXmm(code, args[2]) : ctx.reg_alloc.UseScratchXmm(code, args[2]); const u64 index_count = mcl::bit::replicate_element(static_cast(table_size * 8)); @@ -4895,43 +5262,43 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { switch (table_size) { case 1: { - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); if (is_defaults_zero) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpermb(result | k1 | T_z, indicies, xmm_table0); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermb(result | k1, indicies, xmm_table0); ctx.reg_alloc.DefineValue(code, inst, result); } break; } case 2: { - const Xbyak::Xmm xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.vpunpcklqdq(xmm0, xmm_table0_lower, xmm_table0_upper); if (is_defaults_zero) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpermb(result | k1 | T_z, indicies, xmm0); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermb(result | k1, indicies, xmm0); ctx.reg_alloc.DefineValue(code, inst, result); } break; } case 3: { - const Xbyak::Xmm xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(code, table[2]); + auto const xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table1 = ctx.reg_alloc.UseXmm(code, table[2]); code.vpunpcklqdq(xmm0, xmm_table0_lower, xmm_table0_upper); if (is_defaults_zero) { code.vpermi2b(indicies | k1 | T_z, xmm0, xmm_table1); ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermi2b(indicies, xmm0, xmm_table1); code.vmovdqu8(result | k1, indicies); ctx.reg_alloc.DefineValue(code, inst, result); @@ -4939,17 +5306,17 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { break; } case 4: { - const Xbyak::Xmm xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); - const Xbyak::Xmm xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); + auto const xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); + auto const xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); code.vpunpcklqdq(xmm0, xmm_table0_lower, xmm_table0_upper); code.vpunpcklqdq(xmm_table1, xmm_table1, xmm_table1_upper); if (is_defaults_zero) { code.vpermi2b(indicies | k1 | T_z, xmm0, xmm_table1); ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermi2b(indicies, xmm0, xmm_table1); code.vmovdqu8(result | k1, indicies); ctx.reg_alloc.DefineValue(code, inst, result); @@ -4972,9 +5339,9 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { }; if (code.HasHostFeature(HostFeature::SSSE3) && is_defaults_zero && table_size == 1) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.xorps(result, result); code.movsd(result, xmm_table0); @@ -4986,9 +5353,9 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::SSSE3) && is_defaults_zero && table_size == 2) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.punpcklqdq(xmm_table0, xmm_table0_upper); code.paddusb(indicies, code.Const(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); @@ -4999,12 +5366,12 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::SSE41) && table_size <= 2) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const defaults = ctx.reg_alloc.UseXmm(code, args[0]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); if (table_size == 2) { - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.punpcklqdq(xmm_table0, xmm_table0_upper); ctx.reg_alloc.Release(xmm_table0_upper); } @@ -5023,12 +5390,12 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::SSE41) && is_defaults_zero) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); { - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.punpcklqdq(xmm_table0, xmm_table0_upper); ctx.reg_alloc.Release(xmm_table0_upper); } @@ -5037,7 +5404,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { code.punpcklqdq(xmm_table1, xmm0); } else { ASSERT(table_size == 4); - const Xbyak::Xmm xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); + auto const xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); code.punpcklqdq(xmm_table1, xmm_table1_upper); ctx.reg_alloc.Release(xmm_table1_upper); } @@ -5058,18 +5425,18 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { } if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const defaults = ctx.reg_alloc.UseXmm(code, args[0]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); { - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.punpcklqdq(xmm_table0, xmm_table0_upper); ctx.reg_alloc.Release(xmm_table0_upper); } if (table_size == 4) { - const Xbyak::Xmm xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); + auto const xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); code.punpcklqdq(xmm_table1, xmm_table1_upper); ctx.reg_alloc.Release(xmm_table1_upper); } @@ -5098,37 +5465,31 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { const u32 stack_space = static_cast(6 * 8); ctx.reg_alloc.AllocStackSpace(code, stack_space + ABI_SHADOW_SPACE); for (size_t i = 0; i < table_size; ++i) { - const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(code, table[i]); + auto const table_value = ctx.reg_alloc.UseXmm(code, table[i]); code.movq(qword[rsp + ABI_SHADOW_SPACE + i * 8], table_value); ctx.reg_alloc.Release(table_value); } - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const defaults = ctx.reg_alloc.UseXmm(code, args[0]); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); - code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 4 * 8]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 5 * 8]); code.mov(code.ABI_PARAM4.cvt32(), table_size); code.movq(qword[code.ABI_PARAM2], defaults); code.movq(qword[code.ABI_PARAM3], indicies); - - code.CallLambda( - [](const HalfVectorArray* table, HalfVectorArray& result, const HalfVectorArray& indicies, size_t table_size) { - for (size_t i = 0; i < result.size(); ++i) { - const size_t index = indicies[i] / table[0].size(); - const size_t elem = indicies[i] % table[0].size(); - if (index < table_size) { - result[i] = table[index][elem]; - } - } - }); - + code.CallLambda([](const HalfVectorArray* table, HalfVectorArray& result, const HalfVectorArray& indicies, size_t table_size) { + for (size_t i = 0; i < result.size(); ++i) { + const size_t index = indicies[i] / table[0].size(); + const size_t elem = indicies[i] % table[0].size(); + if (index < table_size) + result[i] = table[index][elem]; + } + }); code.movq(result, qword[rsp + ABI_SHADOW_SPACE + 4 * 8]); ctx.reg_alloc.ReleaseStackSpace(code, stack_space + ABI_SHADOW_SPACE); - ctx.reg_alloc.DefineValue(code, inst, result); } @@ -5142,14 +5503,14 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { const bool is_defaults_zero = !inst->GetArg(0).IsImmediate() && inst->GetArg(0).GetInst()->GetOpcode() == IR::Opcode::ZeroVector; if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI) && table_size == 4) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); code.vpcmpub(k1, indicies, code.BConst<8>(xword, 2 * 16), CmpInt::LessThan); code.vpcmpub(k2, indicies, code.BConst<8>(xword, 4 * 16), CmpInt::LessThan); // Handle vector-table 0,1 - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); code.vpermi2b(indicies | k1, xmm_table0, xmm_table1); @@ -5157,8 +5518,8 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.Release(xmm_table1); // Handle vector-table 2,3 - const Xbyak::Xmm xmm_table2 = ctx.reg_alloc.UseXmm(code, table[2]); - const Xbyak::Xmm xmm_table3 = ctx.reg_alloc.UseXmm(code, table[3]); + auto const xmm_table2 = ctx.reg_alloc.UseXmm(code, table[2]); + auto const xmm_table3 = ctx.reg_alloc.UseXmm(code, table[3]); code.kandnw(k1, k1, k2); code.vpermi2b(indicies | k1, xmm_table2, xmm_table3); @@ -5167,19 +5528,19 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { code.vmovdqu8(indicies | k2 | T_z, indicies); ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm defaults = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const defaults = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vmovdqu8(defaults | k2, indicies); ctx.reg_alloc.DefineValue(code, inst, defaults); } } else if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI) && table_size == 3) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); code.vpcmpub(k1, indicies, code.BConst<8>(xword, 2 * 16), CmpInt::LessThan); code.vpcmpub(k2, indicies, code.BConst<8>(xword, 3 * 16), CmpInt::LessThan); // Handle vector-table 0,1 - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); + auto const xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); code.vpermi2b(indicies | k1, xmm_table0, xmm_table1); @@ -5187,7 +5548,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.Release(xmm_table1); // Handle vector-table 2 - const Xbyak::Xmm xmm_table2 = ctx.reg_alloc.UseXmm(code, table[2]); + auto const xmm_table2 = ctx.reg_alloc.UseXmm(code, table[2]); code.kandnw(k1, k1, k2); code.vpermb(indicies | k1, indicies, xmm_table2); @@ -5196,14 +5557,14 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { code.vmovdqu8(indicies | k2 | T_z, indicies); ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm defaults = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const defaults = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vmovdqu8(defaults | k2, indicies); ctx.reg_alloc.DefineValue(code, inst, defaults); } } else if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI) && table_size == 2) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + auto const xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); code.vpcmpub(k1, indicies, code.BConst<8>(xword, 2 * 16), CmpInt::LessThan); @@ -5211,36 +5572,36 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { code.vpermi2b(indicies | k1 | T_z, xmm_table0, xmm_table1); ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermi2b(indicies, xmm_table0, xmm_table1); code.vmovdqu8(result | k1, indicies); ctx.reg_alloc.DefineValue(code, inst, result); } } else if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI) && table_size == 1) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); code.vpcmpub(k1, indicies, code.BConst<8>(xword, 1 * 16), CmpInt::LessThan); if (is_defaults_zero) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpermb(result | k1 | T_z, indicies, xmm_table0); ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermb(result | k1, indicies, xmm_table0); ctx.reg_alloc.DefineValue(code, inst, result); } } else if (code.HasHostFeature(HostFeature::SSSE3) && is_defaults_zero && table_size == 1) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); code.paddusb(indicies, code.Const(xword, 0x7070707070707070, 0x7070707070707070)); code.pshufb(xmm_table0, indicies); ctx.reg_alloc.DefineValue(code, inst, xmm_table0); } else if (code.HasHostFeature(HostFeature::SSE41) && table_size == 1) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const defaults = ctx.reg_alloc.UseXmm(code, args[0]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpaddusb(xmm0, indicies, code.Const(xword, 0x7070707070707070, 0x7070707070707070)); @@ -5253,9 +5614,9 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, xmm_table0); } else if (code.HasHostFeature(HostFeature::SSE41) && is_defaults_zero && table_size == 2) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[1]); + auto const indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + auto const xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + auto const xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[1]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpaddusb(xmm0, indicies, code.Const(xword, 0x7070707070707070, 0x7070707070707070)); @@ -5271,14 +5632,14 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, xmm_table0); return; } else if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm(code); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const masked = ctx.reg_alloc.ScratchXmm(code); code.vpandd(masked, indicies, code.Const(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); for (size_t i = 0; i < table_size; ++i) { - const Xbyak::Xmm xmm_table = ctx.reg_alloc.UseScratchXmm(code, table[i]); + auto const xmm_table = ctx.reg_alloc.UseScratchXmm(code, table[i]); const Xbyak::Opmask table_mask = k1; const u64 table_index = mcl::bit::replicate_element(i * 16); @@ -5295,15 +5656,15 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, result); } else if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm(code); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const masked = ctx.reg_alloc.ScratchXmm(code); code.movaps(masked, code.Const(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); code.pand(masked, indicies); for (size_t i = 0; i < table_size; ++i) { - const Xbyak::Xmm xmm_table = ctx.reg_alloc.UseScratchXmm(code, table[i]); + auto const xmm_table = ctx.reg_alloc.UseScratchXmm(code, table[i]); const u64 table_index = mcl::bit::replicate_element(i * 16); @@ -5327,13 +5688,13 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { const u32 stack_space = static_cast((table_size + 2) * 16); ctx.reg_alloc.AllocStackSpace(code, stack_space + ABI_SHADOW_SPACE); for (size_t i = 0; i < table_size; ++i) { - const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(code, table[i]); + auto const table_value = ctx.reg_alloc.UseXmm(code, table[i]); code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], table_value); ctx.reg_alloc.Release(table_value); } - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const defaults = ctx.reg_alloc.UseXmm(code, args[0]); + auto const indicies = ctx.reg_alloc.UseXmm(code, args[2]); + auto const result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(code, nullptr); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]); @@ -5360,8 +5721,8 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorTranspose8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm upper = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const upper = ctx.reg_alloc.UseScratchXmm(code, args[1]); const bool part = args[2].GetImmediateU1(); if (!part) { @@ -5379,8 +5740,8 @@ void EmitX64::EmitVectorTranspose8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorTranspose16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm upper = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const upper = ctx.reg_alloc.UseScratchXmm(code, args[1]); const bool part = args[2].GetImmediateU1(); if (!part) { @@ -5398,8 +5759,8 @@ void EmitX64::EmitVectorTranspose16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorTranspose32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm upper = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const upper = ctx.reg_alloc.UseXmm(code, args[1]); const bool part = args[2].GetImmediateU1(); code.shufps(lower, upper, !part ? 0b10001000 : 0b11011101); @@ -5411,8 +5772,8 @@ void EmitX64::EmitVectorTranspose32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorTranspose64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm upper = ctx.reg_alloc.UseXmm(code, args[1]); + auto const lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const upper = ctx.reg_alloc.UseXmm(code, args[1]); const bool part = args[2].GetImmediateU1(); code.shufpd(lower, upper, !part ? 0b00 : 0b11); @@ -5420,89 +5781,87 @@ void EmitX64::EmitVectorTranspose64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, inst, lower); } -static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { + +void EmitX64::EmitVectorUnsignedAbsoluteDifference8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); - - switch (esize) { - case 8: { - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - + if (code.HasHostFeature(HostFeature::AVX)) { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.vpminub(tmp2, tmp0, tmp1); + code.vpmaxub(tmp0, tmp0, tmp1); + code.vpsubb(tmp0, tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.movdqa(temp, x); code.psubusb(temp, y); code.psubusb(y, x); code.por(temp, y); - break; + ctx.reg_alloc.DefineValue(code, inst, temp); } - case 16: { - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); +} +void EmitX64::EmitVectorUnsignedAbsoluteDifference16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + if (code.HasHostFeature(HostFeature::AVX)) { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.vpminuw(tmp2, tmp0, tmp1); + code.vpmaxuw(tmp0, tmp0, tmp1); + code.vpsubw(tmp0, tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); + } else { + auto const temp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.movdqa(temp, x); code.psubusw(temp, y); code.psubusw(y, x); code.por(temp, y); - break; + ctx.reg_alloc.DefineValue(code, inst, temp); } - case 32: - // See https://stackoverflow.com/questions/3380785/compute-the-absolute-difference-between-unsigned-integers-using-sse/3527267#3527267 - if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - - code.movdqa(temp, x); - code.pminud(x, y); - code.pmaxud(temp, y); - code.psubd(temp, x); - } else { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - if (ctx.HasOptimization(OptimizationFlag::CodeSpeed)) { - // About 45 bytes - const Xbyak::Xmm temp_x = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm temp_y = ctx.reg_alloc.ScratchXmm(code); - code.pcmpeqd(temp, temp); - code.pslld(temp, 31); - code.movdqa(temp_x, x); - code.movdqa(temp_y, y); - code.paddd(temp_x, x); - code.paddd(temp_y, y); - code.pcmpgtd(temp_y, temp_x); - code.psubd(x, y); - code.pandn(temp, temp_y); - code.pxor(x, y); - code.psubd(x, y); - } else { - // Smaller code size - about 36 bytes - code.movdqa(temp, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); - code.pxor(x, temp); - code.pxor(y, temp); - code.movdqa(temp, x); - code.psubd(temp, y); - code.pcmpgtd(y, x); - code.psrld(y, 1); - code.pxor(temp, y); - code.psubd(temp, y); - } - } - break; - } - - ctx.reg_alloc.DefineValue(code, inst, temp); -} - -void EmitX64::EmitVectorUnsignedAbsoluteDifference8(EmitContext& ctx, IR::Inst* inst) { - EmitVectorUnsignedAbsoluteDifference(8, ctx, inst, code); -} - -void EmitX64::EmitVectorUnsignedAbsoluteDifference16(EmitContext& ctx, IR::Inst* inst) { - EmitVectorUnsignedAbsoluteDifference(16, ctx, inst, code); } void EmitX64::EmitVectorUnsignedAbsoluteDifference32(EmitContext& ctx, IR::Inst* inst) { - EmitVectorUnsignedAbsoluteDifference(32, ctx, inst, code); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + if (code.HasHostFeature(HostFeature::AVX)) { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + code.vpminud(tmp2, tmp0, tmp1); + code.vpmaxud(tmp0, tmp0, tmp1); + code.vpsubd(tmp0, tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp0); + } else if (code.HasHostFeature(HostFeature::SSE41)) { + // See https://stackoverflow.com/questions/3380785/compute-the-absolute-difference-between-unsigned-integers-using-sse/3527267#3527267 + auto const temp = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + code.movdqa(temp, x); + code.pminud(x, y); + code.pmaxud(temp, y); + code.psubd(temp, x); + ctx.reg_alloc.DefineValue(code, inst, temp); + } else { + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp2 = ctx.reg_alloc.ScratchXmm(code); + auto const tmp3 = ctx.reg_alloc.ScratchXmm(code); + code.movdqa(tmp2, code.Const(xword, 0x8000'00008000'0000, 0x8000'00008000'0000)); + code.movdqa(tmp3, tmp1); + code.pxor(tmp3, tmp2); + code.pxor(tmp2, tmp0); + code.pcmpgtd(tmp2, tmp3); + code.psubd(tmp0, tmp1); + code.pxor(tmp0, tmp2); + code.psubd(tmp2, tmp0); + //code.movdqa(tmp0, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp2); + } } void EmitX64::EmitVectorUnsignedMultiply16(EmitContext& ctx, IR::Inst* inst) { @@ -5510,11 +5869,11 @@ void EmitX64::EmitVectorUnsignedMultiply16(EmitContext& ctx, IR::Inst* inst) { const auto lower_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetLowerFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); if (upper_inst) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmulhuw(result, x, y); } else { @@ -5526,7 +5885,7 @@ void EmitX64::EmitVectorUnsignedMultiply16(EmitContext& ctx, IR::Inst* inst) { } if (lower_inst) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmullw(result, x, y); } else { @@ -5544,24 +5903,24 @@ void EmitX64::EmitVectorUnsignedMultiply32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (lower_inst && !upper_inst && code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const x = ctx.reg_alloc.UseXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseXmm(code, args[1]); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmulld(result, x, y); ctx.reg_alloc.DefineValue(code, lower_inst, result); } else if (code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (lower_inst) { - const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(code); + auto const lower_result = ctx.reg_alloc.ScratchXmm(code); code.vpmulld(lower_result, x, y); ctx.reg_alloc.DefineValue(code, lower_inst, lower_result); } - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + auto const result = ctx.reg_alloc.ScratchXmm(code); code.vpmuludq(result, x, y); code.vpsrlq(x, x, 32); @@ -5571,11 +5930,11 @@ void EmitX64::EmitVectorUnsignedMultiply32(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(code, upper_inst, result); } else { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - const Xbyak::Xmm upper_result = upper_inst ? ctx.reg_alloc.ScratchXmm(code) : Xbyak::Xmm{-1}; - const Xbyak::Xmm lower_result = lower_inst ? ctx.reg_alloc.ScratchXmm(code) : Xbyak::Xmm{-1}; + auto const x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + auto const tmp = ctx.reg_alloc.ScratchXmm(code); + auto const upper_result = upper_inst ? ctx.reg_alloc.ScratchXmm(code) : Xbyak::Xmm{-1}; + auto const lower_result = lower_inst ? ctx.reg_alloc.ScratchXmm(code) : Xbyak::Xmm{-1}; // calculate unsigned multiply code.movdqa(tmp, x); @@ -5792,11 +6151,11 @@ void EmitX64::EmitVectorUnsignedSaturatedShiftLeft64(EmitContext& ctx, IR::Inst* void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { code.pmovzxbw(a, a); } else { - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.punpcklbw(a, zeros); } @@ -5805,11 +6164,11 @@ void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorZeroExtend16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { code.pmovzxwd(a, a); } else { - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.punpcklwd(a, zeros); } @@ -5818,11 +6177,11 @@ void EmitX64::EmitVectorZeroExtend16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorZeroExtend32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { code.pmovzxdq(a, a); } else { - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.punpckldq(a, zeros); } @@ -5831,8 +6190,8 @@ void EmitX64::EmitVectorZeroExtend32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorZeroExtend64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.punpcklqdq(a, zeros); ctx.reg_alloc.DefineValue(code, inst, a); @@ -5840,7 +6199,7 @@ void EmitX64::EmitVectorZeroExtend64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorZeroUpper(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.movq(a, a); // TODO: !IsLastUse @@ -5848,7 +6207,7 @@ void EmitX64::EmitVectorZeroUpper(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitZeroVector(EmitContext& ctx, IR::Inst* inst) { - const Xbyak::Xmm a = ctx.reg_alloc.ScratchXmm(code); + auto const a = ctx.reg_alloc.ScratchXmm(code); code.pxor(a, a); ctx.reg_alloc.DefineValue(code, inst, a); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp index 70edfbd0bc..046ecc78d6 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp @@ -24,6 +24,7 @@ #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/op.h" +#include "dynarmic/common/fp/rounding_mode.h" #include "dynarmic/common/fp/util.h" #include "dynarmic/interface/optimization_flags.h" #include "dynarmic/ir/basic_block.h" @@ -93,7 +94,7 @@ void HandleNaNs(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, std:: code.cmp(bitmask, 0); } - SharedLabel end = GenSharedLabel(), nan = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(), nan = ctx.GenSharedLabel(); code.jnz(*nan, code.T_NEAR); code.L(*end); @@ -188,23 +189,6 @@ void ForceToDefaultNaN(BlockOfCode& code, FP::FPCR fpcr, Xbyak::Xmm result) { } } -template -void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm result) { - const Xbyak::Xmm nan_mask = xmm0; - if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { - constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero, - FpFixup::PosZero); - FCODE(vfixupimmp)(result, result, code.BConst<32>(ptr_b, nan_to_zero), u8(0)); - } else if (code.HasHostFeature(HostFeature::AVX)) { - FCODE(vcmpordp)(nan_mask, result, result); - FCODE(vandp)(result, result, nan_mask); - } else { - code.movaps(nan_mask, result); - FCODE(cmpordp)(nan_mask, nan_mask); - code.andps(result, nan_mask); - } -} - template void DenormalsAreZero(BlockOfCode& code, FP::FPCR fpcr, std::initializer_list to_daz, Xbyak::Xmm tmp) { if (fpcr.FZ()) { @@ -1330,7 +1314,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel(); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { code.movaps(result, xmm_a); @@ -1603,7 +1587,7 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel(); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { code.movaps(result, GetVectorOf(code)); @@ -1776,7 +1760,7 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(code); - SharedLabel bad_values = GenSharedLabel(), end = GenSharedLabel(); + SharedLabel bad_values = ctx.GenSharedLabel(), end = ctx.GenSharedLabel(); code.movaps(value, operand); @@ -1867,7 +1851,7 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code); - SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); + SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel(); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { code.vmovaps(result, GetVectorOf(code)); @@ -2004,120 +1988,123 @@ void EmitX64::EmitFPVectorToHalf32(EmitContext& ctx, IR::Inst* inst) { template void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const size_t fbits = inst->GetArg(1).GetU8(); - const auto rounding = static_cast(inst->GetArg(2).GetU8()); + const auto rounding = FP::RoundingMode(inst->GetArg(2).GetU8()); [[maybe_unused]] const bool fpcr_controlled = inst->GetArg(3).GetU1(); - if constexpr (fsize != 16) { - if (code.HasHostFeature(HostFeature::SSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(code, args[0]); - - MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { - const int round_imm = [&] { - switch (rounding) { - case FP::RoundingMode::ToNearest_TieEven: - default: - return 0b00; - case FP::RoundingMode::TowardsPlusInfinity: - return 0b10; - case FP::RoundingMode::TowardsMinusInfinity: - return 0b01; - case FP::RoundingMode::TowardsZero: - return 0b11; - } - }(); - - const auto perform_conversion = [&code, &ctx](const Xbyak::Xmm& src) { - // MSVC doesn't allow us to use a [&] capture, so we have to do this instead. - (void)ctx; - - if constexpr (fsize == 32) { - code.cvttps2dq(src, src); - } else { - if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { - code.vcvttpd2qq(src, src); - } else { - const Xbyak::Reg64 hi = ctx.reg_alloc.ScratchGpr(code); - const Xbyak::Reg64 lo = ctx.reg_alloc.ScratchGpr(code); - - code.cvttsd2si(lo, src); - code.punpckhqdq(src, src); - code.cvttsd2si(hi, src); - code.movq(src, lo); - code.pinsrq(src, hi, 1); - - ctx.reg_alloc.Release(hi); - ctx.reg_alloc.Release(lo); - } - } - }; - - if (fbits != 0) { - const u64 scale_factor = fsize == 32 - ? static_cast(fbits + 127) << 23 - : static_cast(fbits + 1023) << 52; - FCODE(mulp)(src, GetVectorOf(code, scale_factor)); + if (code.HasHostFeature(HostFeature::SSE41) && fsize != 16 && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(code, args[0]); + MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { + const int round_imm = [&] { + switch (rounding) { + case FP::RoundingMode::ToNearest_TieEven: + default: + return 0b00; + case FP::RoundingMode::TowardsPlusInfinity: + return 0b10; + case FP::RoundingMode::TowardsMinusInfinity: + return 0b01; + case FP::RoundingMode::TowardsZero: + return 0b11; } + }(); + const auto perform_conversion = [&code, &ctx](const Xbyak::Xmm& src) { + // MSVC doesn't allow us to use a [&] capture, so we have to do this instead. + (void)ctx; - FCODE(roundp)(src, src, static_cast(round_imm)); - ZeroIfNaN(code, src); - - constexpr u64 float_upper_limit_signed = fsize == 32 ? 0x4f000000 : 0x43e0000000000000; - [[maybe_unused]] constexpr u64 float_upper_limit_unsigned = fsize == 32 ? 0x4f800000 : 0x43f0000000000000; - - if constexpr (unsigned_) { + if constexpr (fsize == 32) { + code.cvttps2dq(src, src); + } else { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { - // Mask positive values - code.xorps(xmm0, xmm0); - FCODE(vcmpp)(k1, src, xmm0, Cmp::GreaterEqual_OQ); - - // Convert positive values to unsigned integers, write 0 anywhere else - // vcvttp*2u*q already saturates out-of-range values to (0xFFFF...) - if constexpr (fsize == 32) { - code.vcvttps2udq(src | k1 | T_z, src); - } else { - code.vcvttpd2uqq(src | k1 | T_z, src); - } + code.vcvttpd2qq(src, src); } else { - // Zero is minimum - code.xorps(xmm0, xmm0); - FCODE(cmplep)(xmm0, src); - FCODE(andp)(src, xmm0); + const Xbyak::Reg64 hi = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 lo = ctx.reg_alloc.ScratchGpr(code); - // Will we exceed unsigned range? - const Xbyak::Xmm exceed_unsigned = ctx.reg_alloc.ScratchXmm(code); - code.movaps(exceed_unsigned, GetVectorOf(code)); - FCODE(cmplep)(exceed_unsigned, src); + code.cvttsd2si(lo, src); + code.punpckhqdq(src, src); + code.cvttsd2si(hi, src); + code.movq(src, lo); + code.pinsrq(src, hi, 1); - // Will be exceed signed range? - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); - code.movaps(tmp, GetVectorOf(code)); - code.movaps(xmm0, tmp); - FCODE(cmplep)(xmm0, src); - FCODE(andp)(tmp, xmm0); - FCODE(subp)(src, tmp); - perform_conversion(src); - ICODE(psll)(xmm0, u8(fsize - 1)); - FCODE(orp)(src, xmm0); + ctx.reg_alloc.Release(hi); + ctx.reg_alloc.Release(lo); + } + } + }; + if (fbits != 0) { + const u64 scale_factor = fsize == 32 + ? u64(fbits + 127) << 23 + : u64(fbits + 1023) << 52; + FCODE(mulp)(src, GetVectorOf(code, scale_factor)); + } - // Saturate to max - FCODE(orp)(src, exceed_unsigned); + FCODE(roundp)(src, src, u8(round_imm)); + const Xbyak::Xmm nan_mask = xmm0; + if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { + static constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero, FpFixup::PosZero); + FCODE(vfixupimmp)(src, src, code.BConst<32>(ptr_b, nan_to_zero), u8(0)); + } else if (code.HasHostFeature(HostFeature::AVX)) { + FCODE(vcmpordp)(nan_mask, src, src); + FCODE(vandp)(src, src, nan_mask); + } else { + code.movaps(nan_mask, src); + FCODE(cmpordp)(nan_mask, nan_mask); + code.andps(src, nan_mask); + } + + constexpr u64 float_upper_limit_signed = fsize == 32 ? 0x4f000000 : 0x43e0000000000000; + [[maybe_unused]] constexpr u64 float_upper_limit_unsigned = fsize == 32 ? 0x4f800000 : 0x43f0000000000000; + + if constexpr (unsigned_) { + if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { + // Mask positive values + code.xorps(xmm0, xmm0); + FCODE(vcmpp)(k1, src, xmm0, Cmp::GreaterEqual_OQ); + + // Convert positive values to unsigned integers, write 0 anywhere else + // vcvttp*2u*q already saturates out-of-range values to (0xFFFF...) + if (fsize == 32) { + code.vcvttps2udq(src | k1 | T_z, src); + } else { + code.vcvttpd2uqq(src | k1 | T_z, src); } } else { - using FPT = mcl::unsigned_integer_of_size; // WORKAROUND: For issue 678 on MSVC - constexpr u64 integer_max = FPT((std::numeric_limits>>::max)()); - - code.movaps(xmm0, GetVectorOf(code)); + // Zero is minimum + code.xorps(xmm0, xmm0); FCODE(cmplep)(xmm0, src); - perform_conversion(src); - FCODE(blendvp)(src, GetVectorOf(code)); - } - }); + FCODE(andp)(src, xmm0); - ctx.reg_alloc.DefineValue(code, inst, src); - return; - } + // Will we exceed unsigned range? + const Xbyak::Xmm exceed_unsigned = ctx.reg_alloc.ScratchXmm(code); + code.movaps(exceed_unsigned, GetVectorOf(code)); + FCODE(cmplep)(exceed_unsigned, src); + + // Will be exceed signed range? + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + code.movaps(tmp, GetVectorOf(code)); + code.movaps(xmm0, tmp); + FCODE(cmplep)(xmm0, src); + FCODE(andp)(tmp, xmm0); + FCODE(subp)(src, tmp); + perform_conversion(src); + ICODE(psll)(xmm0, u8(fsize - 1)); + FCODE(orp)(src, xmm0); + + // Saturate to max + FCODE(orp)(src, exceed_unsigned); + } + } else { + using FPT = mcl::unsigned_integer_of_size; // WORKAROUND: For issue 678 on MSVC + constexpr u64 integer_max = FPT((std::numeric_limits>>::max)()); + code.movaps(xmm0, GetVectorOf(code)); + FCODE(cmplep)(xmm0, src); + perform_conversion(src); + FCODE(blendvp)(src, GetVectorOf(code)); + } + }); + ctx.reg_alloc.DefineValue(code, inst, src); + return; } using FPT = mcl::unsigned_integer_of_size; // WORKAROUND: For issue 678 on MSVC diff --git a/src/dynarmic/src/dynarmic/backend/x64/exception_handler_windows.cpp b/src/dynarmic/src/dynarmic/backend/x64/exception_handler_windows.cpp index 3ae553bccd..bae397ff2b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/exception_handler_windows.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/exception_handler_windows.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -176,7 +176,7 @@ struct ExceptionHandler::Impl final { code.align(16); const u8* exception_handler_without_cb = code.getCurr(); - code.mov(code.eax, static_cast(ExceptionContinueSearch)); + code.mov(code.eax, u32(ExceptionContinueSearch)); code.ret(); code.align(16); @@ -192,20 +192,18 @@ struct ExceptionHandler::Impl final { code.lea(code.rsp, code.ptr[code.rsp - 8]); code.mov(code.ABI_PARAM1, std::bit_cast(&cb)); code.mov(code.ABI_PARAM2, code.ABI_PARAM3); - code.CallLambda( - [](const std::function& cb_, PCONTEXT ctx) { - FakeCall fc = cb_(ctx->Rip); - - ctx->Rsp -= sizeof(u64); - *std::bit_cast(ctx->Rsp) = fc.ret_rip; - ctx->Rip = fc.call_rip; - }); + code.CallLambda([](const std::function& cb_, PCONTEXT ctx) { + FakeCall fc = cb_(ctx->Rip); + ctx->Rsp -= sizeof(u64); + *std::bit_cast(ctx->Rsp) = fc.ret_rip; + ctx->Rip = fc.call_rip; + }); code.add(code.rsp, 8); - code.mov(code.eax, static_cast(ExceptionContinueExecution)); + code.mov(code.eax, u32(ExceptionContinueExecution)); code.ret(); - exception_handler_without_cb_offset = static_cast(exception_handler_without_cb - code.getCode()); - exception_handler_with_cb_offset = static_cast(exception_handler_with_cb - code.getCode()); + exception_handler_without_cb_offset = ULONG(exception_handler_without_cb - code.getCode()); + exception_handler_with_cb_offset = ULONG(exception_handler_with_cb - code.getCode()); code.align(16); UNWIND_INFO* unwind_info = static_cast(code.AllocateFromCodeSpace(sizeof(UNWIND_INFO))); diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 5c5ed25131..2cfa14ae18 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -417,7 +417,8 @@ HostLoc RegAlloc::SelectARegister(std::bitset<32> desired_locations) const noexc // While R13 and R14 are technically available, we avoid allocating for them // at all costs, because theoretically skipping them is better than spilling // all over the place - i also fixes bugs with high reg pressure - } else if (i >= HostLoc::R13 && i <= HostLoc::R15) { + // %rbp must not be trashed, so skip it as well + } else if (i == HostLoc::RBP || (i >= HostLoc::R13 && i <= HostLoc::R15)) { // skip, do not touch // Intel recommends to reuse registers as soon as they're overwritable (DO NOT SPILL) } else if (loc_info.IsEmpty()) { diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index 8b872a0e9c..746d6b723f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -50,7 +50,7 @@ public: } inline void ReadLock() noexcept { ASSERT(size_t(is_being_used_count) + 1 < (std::numeric_limits::max)()); - ASSERT(!is_scratch); + ASSERT(!bool(is_scratch)); is_being_used_count++; } inline void WriteLock() noexcept { diff --git a/src/dynarmic/src/dynarmic/backend/x64/stack_layout.h b/src/dynarmic/src/dynarmic/backend/x64/stack_layout.h index 50737f12eb..43a3fc7ab2 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/stack_layout.h +++ b/src/dynarmic/src/dynarmic/backend/x64/stack_layout.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -22,14 +22,13 @@ constexpr size_t SpillCount = 64; #endif struct alignas(16) StackLayout { + // Needs alignment for VMOV and XMM spills + alignas(16) std::array, SpillCount> spill; s64 cycles_remaining; s64 cycles_to_run; - - std::array, SpillCount> spill; - u32 save_host_MXCSR; - bool check_bit; + u64 abi_base_pointer; }; #ifdef _MSC_VER diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h index bbf128d797..1e3d368187 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h @@ -36,25 +36,19 @@ inline size_t ToFastLookupIndexArm(u32 instruction) noexcept { } // namespace detail template -constexpr ArmDecodeTable GetArmDecodeTable() noexcept { - std::vector> list = { +static ArmDecodeTable GetArmDecodeTable() noexcept { + ArmDecodeTable table{}; + for (size_t i = 0; i < table.size(); ++i) { + // PLEASE HEAP ELLIDE + for (auto const& e : std::vector>{ #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./arm.inc" #undef INST - }; - - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) { - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); - }); - - ArmDecodeTable table{}; - for (size_t i = 0; i < table.size(); ++i) { - for (auto matcher : list) { - const auto expect = detail::ToFastLookupIndexArm(matcher.GetExpected()); - const auto mask = detail::ToFastLookupIndexArm(matcher.GetMask()); + }) { + auto const expect = detail::ToFastLookupIndexArm(e.GetExpected()); + auto const mask = detail::ToFastLookupIndexArm(e.GetMask()); if ((i & mask) == expect) { - table[i].push_back(matcher); + table[i].push_back(e); } } } @@ -62,7 +56,7 @@ constexpr ArmDecodeTable GetArmDecodeTable() noexcept { } template -std::optional>> DecodeArm(u32 instruction) noexcept { +static std::optional>> DecodeArm(u32 instruction) noexcept { alignas(64) static const auto table = GetArmDecodeTable(); const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); @@ -73,7 +67,7 @@ std::optional>> DecodeArm(u32 instruc } template -std::optional GetNameARM(u32 inst) noexcept { +static std::optional GetNameARM(u32 inst) noexcept { std::vector>> list = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./arm.inc" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.inc b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.inc index 90995c4c05..07f7ce0154 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.inc +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.inc @@ -1,316 +1,265 @@ -// Barrier instructions -INST(arm_DMB, "DMB", "1111010101111111111100000101oooo") // v7 -INST(arm_DSB, "DSB", "1111010101111111111100000100oooo") // v7 -INST(arm_ISB, "ISB", "1111010101111111111100000110oooo") // v7 +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// DO NOT REORDER -// Branch instructions -INST(arm_BLX_imm, "BLX (imm)", "1111101hvvvvvvvvvvvvvvvvvvvvvvvv") // v5 -INST(arm_BLX_reg, "BLX (reg)", "cccc000100101111111111110011mmmm") // v5 -INST(arm_B, "B", "cccc1010vvvvvvvvvvvvvvvvvvvvvvvv") // v1 -INST(arm_BL, "BL", "cccc1011vvvvvvvvvvvvvvvvvvvvvvvv") // v1 -INST(arm_BX, "BX", "cccc000100101111111111110001mmmm") // v4T -INST(arm_BXJ, "BXJ", "cccc000100101111111111110010mmmm") // v5J - -// CRC32 instructions -INST(arm_CRC32, "CRC32", "cccc00010zz0nnnndddd00000100mmmm") // v8 -INST(arm_CRC32C, "CRC32C", "cccc00010zz0nnnndddd00100100mmmm") // v8 - -// Coprocessor instructions -INST(arm_CDP, "CDP", "cccc1110ooooNNNNDDDDppppooo0MMMM") // v2 (CDP2: v5) -INST(arm_LDC, "LDC", "cccc110pudw1nnnnDDDDppppvvvvvvvv") // v2 (LDC2: v5) -INST(arm_MCR, "MCR", "cccc1110ooo0NNNNttttppppooo1MMMM") // v2 (MCR2: v5) -INST(arm_MCRR, "MCRR", "cccc11000100uuuuttttppppooooMMMM") // v5E (MCRR2: v6) -INST(arm_MRC, "MRC", "cccc1110ooo1NNNNttttppppooo1MMMM") // v2 (MRC2: v5) -INST(arm_MRRC, "MRRC", "cccc11000101uuuuttttppppooooMMMM") // v5E (MRRC2: v6) -INST(arm_STC, "STC", "cccc110pudw0nnnnDDDDppppvvvvvvvv") // v2 (STC2: v5) - -// Data Processing instructions -INST(arm_ADC_imm, "ADC (imm)", "cccc0010101Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_ADC_reg, "ADC (reg)", "cccc0000101Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_ADC_rsr, "ADC (rsr)", "cccc0000101Snnnnddddssss0rr1mmmm") // v1 -INST(arm_ADD_imm, "ADD (imm)", "cccc0010100Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_ADD_reg, "ADD (reg)", "cccc0000100Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_ADD_rsr, "ADD (rsr)", "cccc0000100Snnnnddddssss0rr1mmmm") // v1 -INST(arm_AND_imm, "AND (imm)", "cccc0010000Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_AND_reg, "AND (reg)", "cccc0000000Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_AND_rsr, "AND (rsr)", "cccc0000000Snnnnddddssss0rr1mmmm") // v1 -INST(arm_BIC_imm, "BIC (imm)", "cccc0011110Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_BIC_reg, "BIC (reg)", "cccc0001110Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_BIC_rsr, "BIC (rsr)", "cccc0001110Snnnnddddssss0rr1mmmm") // v1 -INST(arm_CMN_imm, "CMN (imm)", "cccc00110111nnnn0000rrrrvvvvvvvv") // v1 -INST(arm_CMN_reg, "CMN (reg)", "cccc00010111nnnn0000vvvvvrr0mmmm") // v1 -INST(arm_CMN_rsr, "CMN (rsr)", "cccc00010111nnnn0000ssss0rr1mmmm") // v1 -INST(arm_CMP_imm, "CMP (imm)", "cccc00110101nnnn0000rrrrvvvvvvvv") // v1 -INST(arm_CMP_reg, "CMP (reg)", "cccc00010101nnnn0000vvvvvrr0mmmm") // v1 -INST(arm_CMP_rsr, "CMP (rsr)", "cccc00010101nnnn0000ssss0rr1mmmm") // v1 -INST(arm_EOR_imm, "EOR (imm)", "cccc0010001Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_EOR_reg, "EOR (reg)", "cccc0000001Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_EOR_rsr, "EOR (rsr)", "cccc0000001Snnnnddddssss0rr1mmmm") // v1 -INST(arm_MOV_imm, "MOV (imm)", "cccc0011101S0000ddddrrrrvvvvvvvv") // v1 -INST(arm_MOV_reg, "MOV (reg)", "cccc0001101S0000ddddvvvvvrr0mmmm") // v1 -INST(arm_MOV_rsr, "MOV (rsr)", "cccc0001101S0000ddddssss0rr1mmmm") // v1 -INST(arm_MVN_imm, "MVN (imm)", "cccc0011111S0000ddddrrrrvvvvvvvv") // v1 -INST(arm_MVN_reg, "MVN (reg)", "cccc0001111S0000ddddvvvvvrr0mmmm") // v1 -INST(arm_MVN_rsr, "MVN (rsr)", "cccc0001111S0000ddddssss0rr1mmmm") // v1 -INST(arm_ORR_imm, "ORR (imm)", "cccc0011100Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_ORR_reg, "ORR (reg)", "cccc0001100Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_ORR_rsr, "ORR (rsr)", "cccc0001100Snnnnddddssss0rr1mmmm") // v1 -INST(arm_RSB_imm, "RSB (imm)", "cccc0010011Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_RSB_reg, "RSB (reg)", "cccc0000011Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_RSB_rsr, "RSB (rsr)", "cccc0000011Snnnnddddssss0rr1mmmm") // v1 -INST(arm_RSC_imm, "RSC (imm)", "cccc0010111Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_RSC_reg, "RSC (reg)", "cccc0000111Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_RSC_rsr, "RSC (rsr)", "cccc0000111Snnnnddddssss0rr1mmmm") // v1 -INST(arm_SBC_imm, "SBC (imm)", "cccc0010110Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_SBC_reg, "SBC (reg)", "cccc0000110Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_SBC_rsr, "SBC (rsr)", "cccc0000110Snnnnddddssss0rr1mmmm") // v1 -INST(arm_SUB_imm, "SUB (imm)", "cccc0010010Snnnnddddrrrrvvvvvvvv") // v1 -INST(arm_SUB_reg, "SUB (reg)", "cccc0000010Snnnnddddvvvvvrr0mmmm") // v1 -INST(arm_SUB_rsr, "SUB (rsr)", "cccc0000010Snnnnddddssss0rr1mmmm") // v1 -INST(arm_TEQ_imm, "TEQ (imm)", "cccc00110011nnnn0000rrrrvvvvvvvv") // v1 -INST(arm_TEQ_reg, "TEQ (reg)", "cccc00010011nnnn0000vvvvvrr0mmmm") // v1 -INST(arm_TEQ_rsr, "TEQ (rsr)", "cccc00010011nnnn0000ssss0rr1mmmm") // v1 -INST(arm_TST_imm, "TST (imm)", "cccc00110001nnnn0000rrrrvvvvvvvv") // v1 -INST(arm_TST_reg, "TST (reg)", "cccc00010001nnnn0000vvvvvrr0mmmm") // v1 -INST(arm_TST_rsr, "TST (rsr)", "cccc00010001nnnn0000ssss0rr1mmmm") // v1 - -// Exception Generating instructions -INST(arm_BKPT, "BKPT", "cccc00010010vvvvvvvvvvvv0111vvvv") // v5 -INST(arm_SVC, "SVC", "cccc1111vvvvvvvvvvvvvvvvvvvvvvvv") // v1 -INST(arm_UDF, "UDF", "111001111111------------1111----") - -// Extension instructions -INST(arm_SXTB, "SXTB", "cccc011010101111ddddrr000111mmmm") // v6 -INST(arm_SXTB16, "SXTB16", "cccc011010001111ddddrr000111mmmm") // v6 -INST(arm_SXTH, "SXTH", "cccc011010111111ddddrr000111mmmm") // v6 -INST(arm_SXTAB, "SXTAB", "cccc01101010nnnnddddrr000111mmmm") // v6 -INST(arm_SXTAB16, "SXTAB16", "cccc01101000nnnnddddrr000111mmmm") // v6 -INST(arm_SXTAH, "SXTAH", "cccc01101011nnnnddddrr000111mmmm") // v6 -INST(arm_UXTB, "UXTB", "cccc011011101111ddddrr000111mmmm") // v6 -INST(arm_UXTB16, "UXTB16", "cccc011011001111ddddrr000111mmmm") // v6 -INST(arm_UXTH, "UXTH", "cccc011011111111ddddrr000111mmmm") // v6 -INST(arm_UXTAB, "UXTAB", "cccc01101110nnnnddddrr000111mmmm") // v6 -INST(arm_UXTAB16, "UXTAB16", "cccc01101100nnnnddddrr000111mmmm") // v6 -INST(arm_UXTAH, "UXTAH", "cccc01101111nnnnddddrr000111mmmm") // v6 - -// Hint instructions -INST(arm_PLD_imm, "PLD (imm)", "11110101uz01nnnn1111iiiiiiiiiiii") // v5E for PLD; v7 for PLDW -INST(arm_PLD_reg, "PLD (reg)", "11110111uz01nnnn1111iiiiitt0mmmm") // v5E for PLD; v7 for PLDW -INST(arm_SEV, "SEV", "----0011001000001111000000000100") // v6K -INST(arm_SEVL, "SEVL", "----0011001000001111000000000101") // v8 -INST(arm_WFE, "WFE", "----0011001000001111000000000010") // v6K -INST(arm_WFI, "WFI", "----0011001000001111000000000011") // v6K -INST(arm_YIELD, "YIELD", "----0011001000001111000000000001") // v6K -INST(arm_NOP, "Reserved Hint", "----0011001000001111------------") -INST(arm_NOP, "Reserved Hint", "----001100100000111100000000----") - -// Synchronization Primitive instructions -INST(arm_CLREX, "CLREX", "11110101011111111111000000011111") // v6K -INST(arm_SWP, "SWP", "cccc00010000nnnntttt00001001uuuu") // v2S (v6: Deprecated) -INST(arm_SWPB, "SWPB", "cccc00010100nnnntttt00001001uuuu") // v2S (v6: Deprecated) -INST(arm_STL, "STL", "cccc00011000nnnn111111001001tttt") // v8 -INST(arm_STLEX, "STLEX", "cccc00011000nnnndddd11101001tttt") // v8 -INST(arm_STREX, "STREX", "cccc00011000nnnndddd11111001mmmm") // v6 -INST(arm_LDA, "LDA", "cccc00011001nnnndddd110010011111") // v8 -INST(arm_LDAEX, "LDAEX", "cccc00011001nnnndddd111010011111") // v8 -INST(arm_LDREX, "LDREX", "cccc00011001nnnndddd111110011111") // v6 -INST(arm_STLEXD, "STLEXD", "cccc00011010nnnndddd11101001mmmm") // v8 -INST(arm_STREXD, "STREXD", "cccc00011010nnnndddd11111001mmmm") // v6K -INST(arm_LDAEXD, "LDAEXD", "cccc00011011nnnndddd111010011111") // v8 -INST(arm_LDREXD, "LDREXD", "cccc00011011nnnndddd111110011111") // v6K -INST(arm_STLB, "STLB", "cccc00011100nnnn111111001001tttt") // v8 -INST(arm_STLEXB, "STLEXB", "cccc00011100nnnndddd11101001mmmm") // v8 -INST(arm_STREXB, "STREXB", "cccc00011100nnnndddd11111001mmmm") // v6K -INST(arm_LDAB, "LDAB", "cccc00011101nnnndddd110010011111") // v8 -INST(arm_LDAEXB, "LDAEXB", "cccc00011101nnnndddd111010011111") // v8 -INST(arm_LDREXB, "LDREXB", "cccc00011101nnnndddd111110011111") // v6K -INST(arm_STLH, "STLH", "cccc00011110nnnn111111001001mmmm") // v8 -INST(arm_STLEXH, "STLEXH", "cccc00011110nnnndddd11101001mmmm") // v8 -INST(arm_STREXH, "STREXH", "cccc00011110nnnndddd11111001mmmm") // v6K -INST(arm_LDAH, "LDAH", "cccc00011111nnnndddd110010011111") // v8 -INST(arm_LDAEXH, "LDAEXH", "cccc00011111nnnndddd111010011111") // v8 -INST(arm_LDREXH, "LDREXH", "cccc00011111nnnndddd111110011111") // v6K - -// Load/Store instructions -INST(arm_LDRBT, "LDRBT (A1)", "----0100-111--------------------") // v1 -INST(arm_LDRBT, "LDRBT (A2)", "----0110-111---------------0----") // v1 -INST(arm_LDRHT, "LDRHT (A1)", "----0000-111------------1011----") // v6T2 -INST(arm_LDRHT, "LDRHT (A1)", "----0000-1111111--------1011----") // v6T2 -INST(arm_LDRHT, "LDRHT (A2)", "----0000-011--------00001011----") // v6T2 -INST(arm_LDRSBT, "LDRSBT (A1)", "----0000-111------------1101----") // v6T2 -INST(arm_LDRSBT, "LDRSBT (A2)", "----0000-011--------00001101----") // v6T2 -INST(arm_LDRSHT, "LDRSHT (A1)", "----0000-111------------1111----") // v6T2 -INST(arm_LDRSHT, "LDRSHT (A2)", "----0000-011--------00001111----") // v6T2 -INST(arm_LDRT, "LDRT (A1)", "----0100-011--------------------") // v1 -INST(arm_LDRT, "LDRT (A2)", "----0110-011---------------0----") // v1 -INST(arm_STRBT, "STRBT (A1)", "----0100-110--------------------") // v1 -INST(arm_STRBT, "STRBT (A2)", "----0110-110---------------0----") // v1 -INST(arm_STRHT, "STRHT (A1)", "----0000-110------------1011----") // v6T2 -INST(arm_STRHT, "STRHT (A2)", "----0000-010--------00001011----") // v6T2 -INST(arm_STRT, "STRT (A1)", "----0100-010--------------------") // v1 -INST(arm_STRT, "STRT (A2)", "----0110-010---------------0----") // v1 -INST(arm_LDR_lit, "LDR (lit)", "cccc0101u0011111ttttvvvvvvvvvvvv") // v1 -INST(arm_LDR_imm, "LDR (imm)", "cccc010pu0w1nnnnttttvvvvvvvvvvvv") // v1 -INST(arm_LDR_reg, "LDR (reg)", "cccc011pu0w1nnnnttttvvvvvrr0mmmm") // v1 -INST(arm_LDRB_lit, "LDRB (lit)", "cccc0101u1011111ttttvvvvvvvvvvvv") // v1 -INST(arm_LDRB_imm, "LDRB (imm)", "cccc010pu1w1nnnnttttvvvvvvvvvvvv") // v1 -INST(arm_LDRB_reg, "LDRB (reg)", "cccc011pu1w1nnnnttttvvvvvrr0mmmm") // v1 -INST(arm_LDRD_lit, "LDRD (lit)", "cccc0001u1001111ttttvvvv1101vvvv") // v5E -INST(arm_LDRD_imm, "LDRD (imm)", "cccc000pu1w0nnnnttttvvvv1101vvvv") // v5E -INST(arm_LDRD_reg, "LDRD (reg)", "cccc000pu0w0nnnntttt00001101mmmm") // v5E -INST(arm_LDRH_lit, "LDRH (lit)", "cccc000pu1w11111ttttvvvv1011vvvv") // v4 -INST(arm_LDRH_imm, "LDRH (imm)", "cccc000pu1w1nnnnttttvvvv1011vvvv") // v4 -INST(arm_LDRH_reg, "LDRH (reg)", "cccc000pu0w1nnnntttt00001011mmmm") // v4 -INST(arm_LDRSB_lit, "LDRSB (lit)", "cccc0001u1011111ttttvvvv1101vvvv") // v4 -INST(arm_LDRSB_imm, "LDRSB (imm)", "cccc000pu1w1nnnnttttvvvv1101vvvv") // v4 -INST(arm_LDRSB_reg, "LDRSB (reg)", "cccc000pu0w1nnnntttt00001101mmmm") // v4 -INST(arm_LDRSH_lit, "LDRSH (lit)", "cccc0001u1011111ttttvvvv1111vvvv") // v4 -INST(arm_LDRSH_imm, "LDRSH (imm)", "cccc000pu1w1nnnnttttvvvv1111vvvv") // v4 -INST(arm_LDRSH_reg, "LDRSH (reg)", "cccc000pu0w1nnnntttt00001111mmmm") // v4 -INST(arm_STR_imm, "STR (imm)", "cccc010pu0w0nnnnttttvvvvvvvvvvvv") // v1 -INST(arm_STR_reg, "STR (reg)", "cccc011pu0w0nnnnttttvvvvvrr0mmmm") // v1 -INST(arm_STRB_imm, "STRB (imm)", "cccc010pu1w0nnnnttttvvvvvvvvvvvv") // v1 -INST(arm_STRB_reg, "STRB (reg)", "cccc011pu1w0nnnnttttvvvvvrr0mmmm") // v1 -INST(arm_STRD_imm, "STRD (imm)", "cccc000pu1w0nnnnttttvvvv1111vvvv") // v5E -INST(arm_STRD_reg, "STRD (reg)", "cccc000pu0w0nnnntttt00001111mmmm") // v5E -INST(arm_STRH_imm, "STRH (imm)", "cccc000pu1w0nnnnttttvvvv1011vvvv") // v4 -INST(arm_STRH_reg, "STRH (reg)", "cccc000pu0w0nnnntttt00001011mmmm") // v4 - -// Load/Store Multiple instructions -INST(arm_LDM, "LDM", "cccc100010w1nnnnxxxxxxxxxxxxxxxx") // v1 -INST(arm_LDMDA, "LDMDA", "cccc100000w1nnnnxxxxxxxxxxxxxxxx") // v1 -INST(arm_LDMDB, "LDMDB", "cccc100100w1nnnnxxxxxxxxxxxxxxxx") // v1 -INST(arm_LDMIB, "LDMIB", "cccc100110w1nnnnxxxxxxxxxxxxxxxx") // v1 -INST(arm_LDM_usr, "LDM (usr reg)", "----100--101--------------------") // v1 -INST(arm_LDM_eret, "LDM (exce ret)", "----100--1-1----1---------------") // v1 -INST(arm_STM, "STM", "cccc100010w0nnnnxxxxxxxxxxxxxxxx") // v1 -INST(arm_STMDA, "STMDA", "cccc100000w0nnnnxxxxxxxxxxxxxxxx") // v1 -INST(arm_STMDB, "STMDB", "cccc100100w0nnnnxxxxxxxxxxxxxxxx") // v1 -INST(arm_STMIB, "STMIB", "cccc100110w0nnnnxxxxxxxxxxxxxxxx") // v1 -INST(arm_STM_usr, "STM (usr reg)", "----100--100--------------------") // v1 - -// Miscellaneous instructions -INST(arm_BFC, "BFC", "cccc0111110vvvvvddddvvvvv0011111") // v6T2 -INST(arm_BFI, "BFI", "cccc0111110vvvvvddddvvvvv001nnnn") // v6T2 -INST(arm_CLZ, "CLZ", "cccc000101101111dddd11110001mmmm") // v5 -INST(arm_MOVT, "MOVT", "cccc00110100vvvvddddvvvvvvvvvvvv") // v6T2 -INST(arm_MOVW, "MOVW", "cccc00110000vvvvddddvvvvvvvvvvvv") // v6T2 -INST(arm_NOP, "NOP", "----0011001000001111000000000000") // v6K -INST(arm_SBFX, "SBFX", "cccc0111101wwwwwddddvvvvv101nnnn") // v6T2 -INST(arm_SEL, "SEL", "cccc01101000nnnndddd11111011mmmm") // v6 -INST(arm_UBFX, "UBFX", "cccc0111111wwwwwddddvvvvv101nnnn") // v6T2 - -// Unsigned Sum of Absolute Differences instructions -INST(arm_USAD8, "USAD8", "cccc01111000dddd1111mmmm0001nnnn") // v6 -INST(arm_USADA8, "USADA8", "cccc01111000ddddaaaammmm0001nnnn") // v6 - -// Packing instructions -INST(arm_PKHBT, "PKHBT", "cccc01101000nnnnddddvvvvv001mmmm") // v6K -INST(arm_PKHTB, "PKHTB", "cccc01101000nnnnddddvvvvv101mmmm") // v6K - -// Reversal instructions -INST(arm_RBIT, "RBIT", "cccc011011111111dddd11110011mmmm") // v6T2 -INST(arm_REV, "REV", "cccc011010111111dddd11110011mmmm") // v6 -INST(arm_REV16, "REV16", "cccc011010111111dddd11111011mmmm") // v6 -INST(arm_REVSH, "REVSH", "cccc011011111111dddd11111011mmmm") // v6 - -// Saturation instructions -INST(arm_SSAT, "SSAT", "cccc0110101vvvvvddddvvvvvr01nnnn") // v6 -INST(arm_SSAT16, "SSAT16", "cccc01101010vvvvdddd11110011nnnn") // v6 -INST(arm_USAT, "USAT", "cccc0110111vvvvvddddvvvvvr01nnnn") // v6 -INST(arm_USAT16, "USAT16", "cccc01101110vvvvdddd11110011nnnn") // v6 - -// Divide instructions -INST(arm_SDIV, "SDIV", "cccc01110001dddd1111mmmm0001nnnn") // v7a -INST(arm_UDIV, "UDIV", "cccc01110011dddd1111mmmm0001nnnn") // v7a - -// Multiply (Normal) instructions -INST(arm_MLA, "MLA", "cccc0000001Sddddaaaammmm1001nnnn") // v2 -INST(arm_MLS, "MLS", "cccc00000110ddddaaaammmm1001nnnn") // v6T2 -INST(arm_MUL, "MUL", "cccc0000000Sdddd0000mmmm1001nnnn") // v2 - -// Multiply (Long) instructions -INST(arm_SMLAL, "SMLAL", "cccc0000111Sddddaaaammmm1001nnnn") // v3M -INST(arm_SMULL, "SMULL", "cccc0000110Sddddaaaammmm1001nnnn") // v3M -INST(arm_UMAAL, "UMAAL", "cccc00000100ddddaaaammmm1001nnnn") // v6 -INST(arm_UMLAL, "UMLAL", "cccc0000101Sddddaaaammmm1001nnnn") // v3M -INST(arm_UMULL, "UMULL", "cccc0000100Sddddaaaammmm1001nnnn") // v3M - -// Multiply (Halfword) instructions -INST(arm_SMLALxy, "SMLALXY", "cccc00010100ddddaaaammmm1xy0nnnn") // v5xP -INST(arm_SMLAxy, "SMLAXY", "cccc00010000ddddaaaammmm1xy0nnnn") // v5xP -INST(arm_SMULxy, "SMULXY", "cccc00010110dddd0000mmmm1xy0nnnn") // v5xP - -// Multiply (Word by Halfword) instructions -INST(arm_SMLAWy, "SMLAWY", "cccc00010010ddddaaaammmm1y00nnnn") // v5xP -INST(arm_SMULWy, "SMULWY", "cccc00010010dddd0000mmmm1y10nnnn") // v5xP - -// Multiply (Most Significant Word) instructions -INST(arm_SMMUL, "SMMUL", "cccc01110101dddd1111mmmm00R1nnnn") // v6 -INST(arm_SMMLA, "SMMLA", "cccc01110101ddddaaaammmm00R1nnnn") // v6 -INST(arm_SMMLS, "SMMLS", "cccc01110101ddddaaaammmm11R1nnnn") // v6 - -// Multiply (Dual) instructions -INST(arm_SMLAD, "SMLAD", "cccc01110000ddddaaaammmm00M1nnnn") // v6 -INST(arm_SMLALD, "SMLALD", "cccc01110100ddddaaaammmm00M1nnnn") // v6 -INST(arm_SMLSD, "SMLSD", "cccc01110000ddddaaaammmm01M1nnnn") // v6 -INST(arm_SMLSLD, "SMLSLD", "cccc01110100ddddaaaammmm01M1nnnn") // v6 -INST(arm_SMUAD, "SMUAD", "cccc01110000dddd1111mmmm00M1nnnn") // v6 -INST(arm_SMUSD, "SMUSD", "cccc01110000dddd1111mmmm01M1nnnn") // v6 - -// Parallel Add/Subtract (Modulo) instructions -INST(arm_SADD8, "SADD8", "cccc01100001nnnndddd11111001mmmm") // v6 -INST(arm_SADD16, "SADD16", "cccc01100001nnnndddd11110001mmmm") // v6 -INST(arm_SASX, "SASX", "cccc01100001nnnndddd11110011mmmm") // v6 -INST(arm_SSAX, "SSAX", "cccc01100001nnnndddd11110101mmmm") // v6 -INST(arm_SSUB8, "SSUB8", "cccc01100001nnnndddd11111111mmmm") // v6 -INST(arm_SSUB16, "SSUB16", "cccc01100001nnnndddd11110111mmmm") // v6 -INST(arm_UADD8, "UADD8", "cccc01100101nnnndddd11111001mmmm") // v6 -INST(arm_UADD16, "UADD16", "cccc01100101nnnndddd11110001mmmm") // v6 -INST(arm_UASX, "UASX", "cccc01100101nnnndddd11110011mmmm") // v6 -INST(arm_USAX, "USAX", "cccc01100101nnnndddd11110101mmmm") // v6 -INST(arm_USUB8, "USUB8", "cccc01100101nnnndddd11111111mmmm") // v6 -INST(arm_USUB16, "USUB16", "cccc01100101nnnndddd11110111mmmm") // v6 - -// Parallel Add/Subtract (Saturating) instructions -INST(arm_QADD8, "QADD8", "cccc01100010nnnndddd11111001mmmm") // v6 -INST(arm_QADD16, "QADD16", "cccc01100010nnnndddd11110001mmmm") // v6 -INST(arm_QASX, "QASX", "cccc01100010nnnndddd11110011mmmm") // v6 -INST(arm_QSAX, "QSAX", "cccc01100010nnnndddd11110101mmmm") // v6 -INST(arm_QSUB8, "QSUB8", "cccc01100010nnnndddd11111111mmmm") // v6 -INST(arm_QSUB16, "QSUB16", "cccc01100010nnnndddd11110111mmmm") // v6 -INST(arm_UQADD8, "UQADD8", "cccc01100110nnnndddd11111001mmmm") // v6 -INST(arm_UQADD16, "UQADD16", "cccc01100110nnnndddd11110001mmmm") // v6 -INST(arm_UQASX, "UQASX", "cccc01100110nnnndddd11110011mmmm") // v6 -INST(arm_UQSAX, "UQSAX", "cccc01100110nnnndddd11110101mmmm") // v6 -INST(arm_UQSUB8, "UQSUB8", "cccc01100110nnnndddd11111111mmmm") // v6 -INST(arm_UQSUB16, "UQSUB16", "cccc01100110nnnndddd11110111mmmm") // v6 - -// Parallel Add/Subtract (Halving) instructions -INST(arm_SHADD8, "SHADD8", "cccc01100011nnnndddd11111001mmmm") // v6 -INST(arm_SHADD16, "SHADD16", "cccc01100011nnnndddd11110001mmmm") // v6 -INST(arm_SHASX, "SHASX", "cccc01100011nnnndddd11110011mmmm") // v6 -INST(arm_SHSAX, "SHSAX", "cccc01100011nnnndddd11110101mmmm") // v6 -INST(arm_SHSUB8, "SHSUB8", "cccc01100011nnnndddd11111111mmmm") // v6 -INST(arm_SHSUB16, "SHSUB16", "cccc01100011nnnndddd11110111mmmm") // v6 -INST(arm_UHADD8, "UHADD8", "cccc01100111nnnndddd11111001mmmm") // v6 -INST(arm_UHADD16, "UHADD16", "cccc01100111nnnndddd11110001mmmm") // v6 -INST(arm_UHASX, "UHASX", "cccc01100111nnnndddd11110011mmmm") // v6 -INST(arm_UHSAX, "UHSAX", "cccc01100111nnnndddd11110101mmmm") // v6 -INST(arm_UHSUB8, "UHSUB8", "cccc01100111nnnndddd11111111mmmm") // v6 -INST(arm_UHSUB16, "UHSUB16", "cccc01100111nnnndddd11110111mmmm") // v6 - -// Saturated Add/Subtract instructions -INST(arm_QADD, "QADD", "cccc00010000nnnndddd00000101mmmm") // v5xP -INST(arm_QSUB, "QSUB", "cccc00010010nnnndddd00000101mmmm") // v5xP -INST(arm_QDADD, "QDADD", "cccc00010100nnnndddd00000101mmmm") // v5xP -INST(arm_QDSUB, "QDSUB", "cccc00010110nnnndddd00000101mmmm") // v5xP - -// Status Register Access instructions -INST(arm_CPS, "CPS", "111100010000---00000000---0-----") // v6 -INST(arm_SETEND, "SETEND", "1111000100000001000000e000000000") // v6 -INST(arm_MRS, "MRS", "cccc000100001111dddd000000000000") // v3 -INST(arm_MSR_imm, "MSR (imm)", "cccc00110010mmmm1111rrrrvvvvvvvv") // v3 -INST(arm_MSR_reg, "MSR (reg)", "cccc00010010mmmm111100000000nnnn") // v3 -INST(arm_RFE, "RFE", "1111100--0-1----0000101000000000") // v6 -INST(arm_SRS, "SRS", "1111100--1-0110100000101000-----") // v6 +INST(arm_CLREX, "CLREX", "11110101011111111111000000011111") +INST(arm_SETEND, "SETEND", "1111000100000001000000e000000000") +INST(arm_DMB, "DMB", "1111010101111111111100000101oooo") +INST(arm_DSB, "DSB", "1111010101111111111100000100oooo") +INST(arm_ISB, "ISB", "1111010101111111111100000110oooo") +INST(arm_SEV, "SEV", "----0011001000001111000000000100") +INST(arm_SEVL, "SEVL", "----0011001000001111000000000101") +INST(arm_WFE, "WFE", "----0011001000001111000000000010") +INST(arm_WFI, "WFI", "----0011001000001111000000000011") +INST(arm_YIELD, "YIELD", "----0011001000001111000000000001") +INST(arm_NOP, "NOP", "----0011001000001111000000000000") +INST(arm_RFE, "RFE", "1111100--0-1----0000101000000000") +INST(arm_BLX_reg, "BLX (reg)", "cccc000100101111111111110011mmmm") +INST(arm_BX, "BX", "cccc000100101111111111110001mmmm") +INST(arm_BXJ, "BXJ", "cccc000100101111111111110010mmmm") +INST(arm_NOP, "Reserved Hint", "----001100100000111100000000----") +INST(arm_MRS, "MRS", "cccc000100001111dddd000000000000") +INST(arm_SRS, "SRS", "1111100--1-0110100000101000-----") +INST(arm_CPS, "CPS", "111100010000---00000000---0-----") +INST(arm_STL, "STL", "cccc00011000nnnn111111001001tttt") +INST(arm_LDA, "LDA", "cccc00011001nnnndddd110010011111") +INST(arm_LDAEX, "LDAEX", "cccc00011001nnnndddd111010011111") +INST(arm_LDREX, "LDREX", "cccc00011001nnnndddd111110011111") +INST(arm_LDAEXD, "LDAEXD", "cccc00011011nnnndddd111010011111") +INST(arm_LDREXD, "LDREXD", "cccc00011011nnnndddd111110011111") +INST(arm_STLB, "STLB", "cccc00011100nnnn111111001001tttt") +INST(arm_LDAB, "LDAB", "cccc00011101nnnndddd110010011111") +INST(arm_LDAEXB, "LDAEXB", "cccc00011101nnnndddd111010011111") +INST(arm_LDREXB, "LDREXB", "cccc00011101nnnndddd111110011111") +INST(arm_STLH, "STLH", "cccc00011110nnnn111111001001mmmm") +INST(arm_LDAH, "LDAH", "cccc00011111nnnndddd110010011111") +INST(arm_LDAEXH, "LDAEXH", "cccc00011111nnnndddd111010011111") +INST(arm_LDREXH, "LDREXH", "cccc00011111nnnndddd111110011111") +INST(arm_CLZ, "CLZ", "cccc000101101111dddd11110001mmmm") +INST(arm_RBIT, "RBIT", "cccc011011111111dddd11110011mmmm") +INST(arm_REV, "REV", "cccc011010111111dddd11110011mmmm") +INST(arm_REV16, "REV16", "cccc011010111111dddd11111011mmmm") +INST(arm_REVSH, "REVSH", "cccc011011111111dddd11111011mmmm") +INST(arm_MSR_reg, "MSR (reg)", "cccc00010010mmmm111100000000nnnn") +INST(arm_SXTB, "SXTB", "cccc011010101111ddddrr000111mmmm") +INST(arm_SXTB16, "SXTB16", "cccc011010001111ddddrr000111mmmm") +INST(arm_SXTH, "SXTH", "cccc011010111111ddddrr000111mmmm") +INST(arm_UXTB, "UXTB", "cccc011011101111ddddrr000111mmmm") +INST(arm_UXTB16, "UXTB16", "cccc011011001111ddddrr000111mmmm") +INST(arm_UXTH, "UXTH", "cccc011011111111ddddrr000111mmmm") +INST(arm_UDF, "UDF", "111001111111------------1111----") +INST(arm_NOP, "Reserved Hint", "----0011001000001111------------") +INST(arm_SWP, "SWP", "cccc00010000nnnntttt00001001uuuu") +INST(arm_SWPB, "SWPB", "cccc00010100nnnntttt00001001uuuu") +INST(arm_STLEX, "STLEX", "cccc00011000nnnndddd11101001tttt") +INST(arm_STREX, "STREX", "cccc00011000nnnndddd11111001mmmm") +INST(arm_STLEXD, "STLEXD", "cccc00011010nnnndddd11101001mmmm") +INST(arm_STREXD, "STREXD", "cccc00011010nnnndddd11111001mmmm") +INST(arm_STLEXB, "STLEXB", "cccc00011100nnnndddd11101001mmmm") +INST(arm_STREXB, "STREXB", "cccc00011100nnnndddd11111001mmmm") +INST(arm_STLEXH, "STLEXH", "cccc00011110nnnndddd11101001mmmm") +INST(arm_STREXH, "STREXH", "cccc00011110nnnndddd11111001mmmm") +INST(arm_SEL, "SEL", "cccc01101000nnnndddd11111011mmmm") +INST(arm_USAD8, "USAD8", "cccc01111000dddd1111mmmm0001nnnn") +INST(arm_SSAT16, "SSAT16", "cccc01101010vvvvdddd11110011nnnn") +INST(arm_USAT16, "USAT16", "cccc01101110vvvvdddd11110011nnnn") +INST(arm_SDIV, "SDIV", "cccc01110001dddd1111mmmm0001nnnn") +INST(arm_UDIV, "UDIV", "cccc01110011dddd1111mmmm0001nnnn") +INST(arm_SADD8, "SADD8", "cccc01100001nnnndddd11111001mmmm") +INST(arm_SADD16, "SADD16", "cccc01100001nnnndddd11110001mmmm") +INST(arm_SASX, "SASX", "cccc01100001nnnndddd11110011mmmm") +INST(arm_SSAX, "SSAX", "cccc01100001nnnndddd11110101mmmm") +INST(arm_SSUB8, "SSUB8", "cccc01100001nnnndddd11111111mmmm") +INST(arm_SSUB16, "SSUB16", "cccc01100001nnnndddd11110111mmmm") +INST(arm_UADD8, "UADD8", "cccc01100101nnnndddd11111001mmmm") +INST(arm_UADD16, "UADD16", "cccc01100101nnnndddd11110001mmmm") +INST(arm_UASX, "UASX", "cccc01100101nnnndddd11110011mmmm") +INST(arm_USAX, "USAX", "cccc01100101nnnndddd11110101mmmm") +INST(arm_USUB8, "USUB8", "cccc01100101nnnndddd11111111mmmm") +INST(arm_USUB16, "USUB16", "cccc01100101nnnndddd11110111mmmm") +INST(arm_QADD8, "QADD8", "cccc01100010nnnndddd11111001mmmm") +INST(arm_QADD16, "QADD16", "cccc01100010nnnndddd11110001mmmm") +INST(arm_QASX, "QASX", "cccc01100010nnnndddd11110011mmmm") +INST(arm_QSAX, "QSAX", "cccc01100010nnnndddd11110101mmmm") +INST(arm_QSUB8, "QSUB8", "cccc01100010nnnndddd11111111mmmm") +INST(arm_QSUB16, "QSUB16", "cccc01100010nnnndddd11110111mmmm") +INST(arm_UQADD8, "UQADD8", "cccc01100110nnnndddd11111001mmmm") +INST(arm_UQADD16, "UQADD16", "cccc01100110nnnndddd11110001mmmm") +INST(arm_UQASX, "UQASX", "cccc01100110nnnndddd11110011mmmm") +INST(arm_UQSAX, "UQSAX", "cccc01100110nnnndddd11110101mmmm") +INST(arm_UQSUB8, "UQSUB8", "cccc01100110nnnndddd11111111mmmm") +INST(arm_UQSUB16, "UQSUB16", "cccc01100110nnnndddd11110111mmmm") +INST(arm_SHADD8, "SHADD8", "cccc01100011nnnndddd11111001mmmm") +INST(arm_SHADD16, "SHADD16", "cccc01100011nnnndddd11110001mmmm") +INST(arm_SHASX, "SHASX", "cccc01100011nnnndddd11110011mmmm") +INST(arm_SHSAX, "SHSAX", "cccc01100011nnnndddd11110101mmmm") +INST(arm_SHSUB8, "SHSUB8", "cccc01100011nnnndddd11111111mmmm") +INST(arm_SHSUB16, "SHSUB16", "cccc01100011nnnndddd11110111mmmm") +INST(arm_UHADD8, "UHADD8", "cccc01100111nnnndddd11111001mmmm") +INST(arm_UHADD16, "UHADD16", "cccc01100111nnnndddd11110001mmmm") +INST(arm_UHASX, "UHASX", "cccc01100111nnnndddd11110011mmmm") +INST(arm_UHSAX, "UHSAX", "cccc01100111nnnndddd11110101mmmm") +INST(arm_UHSUB8, "UHSUB8", "cccc01100111nnnndddd11111111mmmm") +INST(arm_UHSUB16, "UHSUB16", "cccc01100111nnnndddd11110111mmmm") +INST(arm_QADD, "QADD", "cccc00010000nnnndddd00000101mmmm") +INST(arm_QSUB, "QSUB", "cccc00010010nnnndddd00000101mmmm") +INST(arm_QDADD, "QDADD", "cccc00010100nnnndddd00000101mmmm") +INST(arm_QDSUB, "QDSUB", "cccc00010110nnnndddd00000101mmmm") +INST(arm_PLD_reg, "PLD (reg)", "11110111uz01nnnn1111iiiiitt0mmmm") +INST(arm_LDRHT, "LDRHT (A1)", "----0000-1111111--------1011----") +INST(arm_LDRHT, "LDRHT (A2)", "----0000-011--------00001011----") +INST(arm_LDRSBT, "LDRSBT (A2)", "----0000-011--------00001101----") +INST(arm_LDRSHT, "LDRSHT (A2)", "----0000-011--------00001111----") +INST(arm_STRHT, "STRHT (A2)", "----0000-010--------00001011----") +INST(arm_LDRD_lit, "LDRD (lit)", "cccc0001u1001111ttttvvvv1101vvvv") +INST(arm_LDRSB_lit, "LDRSB (lit)", "cccc0001u1011111ttttvvvv1101vvvv") +INST(arm_LDRSH_lit, "LDRSH (lit)", "cccc0001u1011111ttttvvvv1111vvvv") +INST(arm_MUL, "MUL", "cccc0000000Sdddd0000mmmm1001nnnn") +INST(arm_SMULWy, "SMULWY", "cccc00010010dddd0000mmmm1y10nnnn") +INST(arm_SMMUL, "SMMUL", "cccc01110101dddd1111mmmm00R1nnnn") +INST(arm_SMUAD, "SMUAD", "cccc01110000dddd1111mmmm00M1nnnn") +INST(arm_SMUSD, "SMUSD", "cccc01110000dddd1111mmmm01M1nnnn") +INST(arm_CRC32, "CRC32", "cccc00010zz0nnnndddd00000100mmmm") +INST(arm_CRC32C, "CRC32C", "cccc00010zz0nnnndddd00100100mmmm") +INST(arm_CMN_rsr, "CMN (rsr)", "cccc00010111nnnn0000ssss0rr1mmmm") +INST(arm_CMP_rsr, "CMP (rsr)", "cccc00010101nnnn0000ssss0rr1mmmm") +INST(arm_TEQ_rsr, "TEQ (rsr)", "cccc00010011nnnn0000ssss0rr1mmmm") +INST(arm_TST_rsr, "TST (rsr)", "cccc00010001nnnn0000ssss0rr1mmmm") +INST(arm_SXTAB, "SXTAB", "cccc01101010nnnnddddrr000111mmmm") +INST(arm_SXTAB16, "SXTAB16", "cccc01101000nnnnddddrr000111mmmm") +INST(arm_SXTAH, "SXTAH", "cccc01101011nnnnddddrr000111mmmm") +INST(arm_UXTAB, "UXTAB", "cccc01101110nnnnddddrr000111mmmm") +INST(arm_UXTAB16, "UXTAB16", "cccc01101100nnnnddddrr000111mmmm") +INST(arm_UXTAH, "UXTAH", "cccc01101111nnnnddddrr000111mmmm") +INST(arm_PLD_imm, "PLD (imm)", "11110101uz01nnnn1111iiiiiiiiiiii") +INST(arm_BFC, "BFC", "cccc0111110vvvvvddddvvvvv0011111") +INST(arm_SMULxy, "SMULXY", "cccc00010110dddd0000mmmm1xy0nnnn") +INST(arm_CMN_reg, "CMN (reg)", "cccc00010111nnnn0000vvvvvrr0mmmm") +INST(arm_CMP_reg, "CMP (reg)", "cccc00010101nnnn0000vvvvvrr0mmmm") +INST(arm_MOV_rsr, "MOV (rsr)", "cccc0001101S0000ddddssss0rr1mmmm") +INST(arm_MVN_rsr, "MVN (rsr)", "cccc0001111S0000ddddssss0rr1mmmm") +INST(arm_TEQ_reg, "TEQ (reg)", "cccc00010011nnnn0000vvvvvrr0mmmm") +INST(arm_TST_reg, "TST (reg)", "cccc00010001nnnn0000vvvvvrr0mmmm") +INST(arm_LDRD_reg, "LDRD (reg)", "cccc000pu0w0nnnntttt00001101mmmm") +INST(arm_LDRH_lit, "LDRH (lit)", "cccc000pu1w11111ttttvvvv1011vvvv") +INST(arm_LDRH_reg, "LDRH (reg)", "cccc000pu0w1nnnntttt00001011mmmm") +INST(arm_LDRSB_reg, "LDRSB (reg)", "cccc000pu0w1nnnntttt00001101mmmm") +INST(arm_LDRSH_reg, "LDRSH (reg)", "cccc000pu0w1nnnntttt00001111mmmm") +INST(arm_STRD_reg, "STRD (reg)", "cccc000pu0w0nnnntttt00001111mmmm") +INST(arm_STRH_reg, "STRH (reg)", "cccc000pu0w0nnnntttt00001011mmmm") +INST(arm_CMN_imm, "CMN (imm)", "cccc00110111nnnn0000rrrrvvvvvvvv") +INST(arm_CMP_imm, "CMP (imm)", "cccc00110101nnnn0000rrrrvvvvvvvv") +INST(arm_MOV_reg, "MOV (reg)", "cccc0001101S0000ddddvvvvvrr0mmmm") +INST(arm_MVN_reg, "MVN (reg)", "cccc0001111S0000ddddvvvvvrr0mmmm") +INST(arm_TEQ_imm, "TEQ (imm)", "cccc00110011nnnn0000rrrrvvvvvvvv") +INST(arm_TST_imm, "TST (imm)", "cccc00110001nnnn0000rrrrvvvvvvvv") +INST(arm_BKPT, "BKPT", "cccc00010010vvvvvvvvvvvv0111vvvv") +INST(arm_USADA8, "USADA8", "cccc01111000ddddaaaammmm0001nnnn") +INST(arm_MLS, "MLS", "cccc00000110ddddaaaammmm1001nnnn") +INST(arm_UMAAL, "UMAAL", "cccc00000100ddddaaaammmm1001nnnn") +INST(arm_MSR_imm, "MSR (imm)", "cccc00110010mmmm1111rrrrvvvvvvvv") +INST(arm_MOV_imm, "MOV (imm)", "cccc0011101S0000ddddrrrrvvvvvvvv") +INST(arm_MVN_imm, "MVN (imm)", "cccc0011111S0000ddddrrrrvvvvvvvv") +INST(arm_LDRHT, "LDRHT (A1)", "----0000-111------------1011----") +INST(arm_LDRSBT, "LDRSBT (A1)", "----0000-111------------1101----") +INST(arm_LDRSHT, "LDRSHT (A1)", "----0000-111------------1111----") +INST(arm_STRHT, "STRHT (A1)", "----0000-110------------1011----") +INST(arm_LDR_lit, "LDR (lit)", "cccc0101u0011111ttttvvvvvvvvvvvv") +INST(arm_LDRB_lit, "LDRB (lit)", "cccc0101u1011111ttttvvvvvvvvvvvv") +INST(arm_PKHBT, "PKHBT", "cccc01101000nnnnddddvvvvv001mmmm") +INST(arm_PKHTB, "PKHTB", "cccc01101000nnnnddddvvvvv101mmmm") +INST(arm_MLA, "MLA", "cccc0000001Sddddaaaammmm1001nnnn") +INST(arm_SMLAL, "SMLAL", "cccc0000111Sddddaaaammmm1001nnnn") +INST(arm_SMULL, "SMULL", "cccc0000110Sddddaaaammmm1001nnnn") +INST(arm_UMLAL, "UMLAL", "cccc0000101Sddddaaaammmm1001nnnn") +INST(arm_UMULL, "UMULL", "cccc0000100Sddddaaaammmm1001nnnn") +INST(arm_SMLAWy, "SMLAWY", "cccc00010010ddddaaaammmm1y00nnnn") +INST(arm_SMMLA, "SMMLA", "cccc01110101ddddaaaammmm00R1nnnn") +INST(arm_SMMLS, "SMMLS", "cccc01110101ddddaaaammmm11R1nnnn") +INST(arm_SMLAD, "SMLAD", "cccc01110000ddddaaaammmm00M1nnnn") +INST(arm_SMLALD, "SMLALD", "cccc01110100ddddaaaammmm00M1nnnn") +INST(arm_SMLSD, "SMLSD", "cccc01110000ddddaaaammmm01M1nnnn") +INST(arm_SMLSLD, "SMLSLD", "cccc01110100ddddaaaammmm01M1nnnn") +INST(arm_BFI, "BFI", "cccc0111110vvvvvddddvvvvv001nnnn") +INST(arm_SBFX, "SBFX", "cccc0111101wwwwwddddvvvvv101nnnn") +INST(arm_UBFX, "UBFX", "cccc0111111wwwwwddddvvvvv101nnnn") +INST(arm_SMLALxy, "SMLALXY", "cccc00010100ddddaaaammmm1xy0nnnn") +INST(arm_SMLAxy, "SMLAXY", "cccc00010000ddddaaaammmm1xy0nnnn") +INST(arm_ADC_rsr, "ADC (rsr)", "cccc0000101Snnnnddddssss0rr1mmmm") +INST(arm_ADD_rsr, "ADD (rsr)", "cccc0000100Snnnnddddssss0rr1mmmm") +INST(arm_AND_rsr, "AND (rsr)", "cccc0000000Snnnnddddssss0rr1mmmm") +INST(arm_BIC_rsr, "BIC (rsr)", "cccc0001110Snnnnddddssss0rr1mmmm") +INST(arm_EOR_rsr, "EOR (rsr)", "cccc0000001Snnnnddddssss0rr1mmmm") +INST(arm_ORR_rsr, "ORR (rsr)", "cccc0001100Snnnnddddssss0rr1mmmm") +INST(arm_RSB_rsr, "RSB (rsr)", "cccc0000011Snnnnddddssss0rr1mmmm") +INST(arm_RSC_rsr, "RSC (rsr)", "cccc0000111Snnnnddddssss0rr1mmmm") +INST(arm_SBC_rsr, "SBC (rsr)", "cccc0000110Snnnnddddssss0rr1mmmm") +INST(arm_SUB_rsr, "SUB (rsr)", "cccc0000010Snnnnddddssss0rr1mmmm") +INST(arm_LDRD_imm, "LDRD (imm)", "cccc000pu1w0nnnnttttvvvv1101vvvv") +INST(arm_LDRH_imm, "LDRH (imm)", "cccc000pu1w1nnnnttttvvvv1011vvvv") +INST(arm_LDRSB_imm, "LDRSB (imm)", "cccc000pu1w1nnnnttttvvvv1101vvvv") +INST(arm_LDRSH_imm, "LDRSH (imm)", "cccc000pu1w1nnnnttttvvvv1111vvvv") +INST(arm_STRD_imm, "STRD (imm)", "cccc000pu1w0nnnnttttvvvv1111vvvv") +INST(arm_STRH_imm, "STRH (imm)", "cccc000pu1w0nnnnttttvvvv1011vvvv") +INST(arm_SSAT, "SSAT", "cccc0110101vvvvvddddvvvvvr01nnnn") +INST(arm_USAT, "USAT", "cccc0110111vvvvvddddvvvvvr01nnnn") +INST(arm_MCRR, "MCRR", "cccc11000100uuuuttttppppooooMMMM") +INST(arm_MRRC, "MRRC", "cccc11000101uuuuttttppppooooMMMM") +INST(arm_ADC_reg, "ADC (reg)", "cccc0000101Snnnnddddvvvvvrr0mmmm") +INST(arm_ADD_reg, "ADD (reg)", "cccc0000100Snnnnddddvvvvvrr0mmmm") +INST(arm_AND_reg, "AND (reg)", "cccc0000000Snnnnddddvvvvvrr0mmmm") +INST(arm_BIC_reg, "BIC (reg)", "cccc0001110Snnnnddddvvvvvrr0mmmm") +INST(arm_EOR_reg, "EOR (reg)", "cccc0000001Snnnnddddvvvvvrr0mmmm") +INST(arm_ORR_reg, "ORR (reg)", "cccc0001100Snnnnddddvvvvvrr0mmmm") +INST(arm_RSB_reg, "RSB (reg)", "cccc0000011Snnnnddddvvvvvrr0mmmm") +INST(arm_RSC_reg, "RSC (reg)", "cccc0000111Snnnnddddvvvvvrr0mmmm") +INST(arm_SBC_reg, "SBC (reg)", "cccc0000110Snnnnddddvvvvvrr0mmmm") +INST(arm_SUB_reg, "SUB (reg)", "cccc0000010Snnnnddddvvvvvrr0mmmm") +INST(arm_LDRBT, "LDRBT (A2)", "----0110-111---------------0----") +INST(arm_LDRT, "LDRT (A2)", "----0110-011---------------0----") +INST(arm_STRBT, "STRBT (A2)", "----0110-110---------------0----") +INST(arm_STRT, "STRT (A2)", "----0110-010---------------0----") +INST(arm_MOVT, "MOVT", "cccc00110100vvvvddddvvvvvvvvvvvv") +INST(arm_MOVW, "MOVW", "cccc00110000vvvvddddvvvvvvvvvvvv") +INST(arm_BLX_imm, "BLX (imm)", "1111101hvvvvvvvvvvvvvvvvvvvvvvvv") +INST(arm_ADC_imm, "ADC (imm)", "cccc0010101Snnnnddddrrrrvvvvvvvv") +INST(arm_ADD_imm, "ADD (imm)", "cccc0010100Snnnnddddrrrrvvvvvvvv") +INST(arm_AND_imm, "AND (imm)", "cccc0010000Snnnnddddrrrrvvvvvvvv") +INST(arm_BIC_imm, "BIC (imm)", "cccc0011110Snnnnddddrrrrvvvvvvvv") +INST(arm_EOR_imm, "EOR (imm)", "cccc0010001Snnnnddddrrrrvvvvvvvv") +INST(arm_ORR_imm, "ORR (imm)", "cccc0011100Snnnnddddrrrrvvvvvvvv") +INST(arm_RSB_imm, "RSB (imm)", "cccc0010011Snnnnddddrrrrvvvvvvvv") +INST(arm_RSC_imm, "RSC (imm)", "cccc0010111Snnnnddddrrrrvvvvvvvv") +INST(arm_SBC_imm, "SBC (imm)", "cccc0010110Snnnnddddrrrrvvvvvvvv") +INST(arm_SUB_imm, "SUB (imm)", "cccc0010010Snnnnddddrrrrvvvvvvvv") +INST(arm_LDRBT, "LDRBT (A1)", "----0100-111--------------------") +INST(arm_LDRT, "LDRT (A1)", "----0100-011--------------------") +INST(arm_STRBT, "STRBT (A1)", "----0100-110--------------------") +INST(arm_STRT, "STRT (A1)", "----0100-010--------------------") +INST(arm_LDM, "LDM", "cccc100010w1nnnnxxxxxxxxxxxxxxxx") +INST(arm_LDMDA, "LDMDA", "cccc100000w1nnnnxxxxxxxxxxxxxxxx") +INST(arm_LDMDB, "LDMDB", "cccc100100w1nnnnxxxxxxxxxxxxxxxx") +INST(arm_LDMIB, "LDMIB", "cccc100110w1nnnnxxxxxxxxxxxxxxxx") +INST(arm_STM, "STM", "cccc100010w0nnnnxxxxxxxxxxxxxxxx") +INST(arm_STMDA, "STMDA", "cccc100000w0nnnnxxxxxxxxxxxxxxxx") +INST(arm_STMDB, "STMDB", "cccc100100w0nnnnxxxxxxxxxxxxxxxx") +INST(arm_STMIB, "STMIB", "cccc100110w0nnnnxxxxxxxxxxxxxxxx") +INST(arm_MCR, "MCR", "cccc1110ooo0NNNNttttppppooo1MMMM") +INST(arm_MRC, "MRC", "cccc1110ooo1NNNNttttppppooo1MMMM") +INST(arm_LDR_reg, "LDR (reg)", "cccc011pu0w1nnnnttttvvvvvrr0mmmm") +INST(arm_LDRB_reg, "LDRB (reg)", "cccc011pu1w1nnnnttttvvvvvrr0mmmm") +INST(arm_STR_reg, "STR (reg)", "cccc011pu0w0nnnnttttvvvvvrr0mmmm") +INST(arm_STRB_reg, "STRB (reg)", "cccc011pu1w0nnnnttttvvvvvrr0mmmm") +INST(arm_LDM_usr, "LDM (usr reg)", "----100--101--------------------") +INST(arm_LDM_eret, "LDM (exce ret)", "----100--1-1----1---------------") +INST(arm_STM_usr, "STM (usr reg)", "----100--100--------------------") +INST(arm_CDP, "CDP", "cccc1110ooooNNNNDDDDppppooo0MMMM") +INST(arm_LDR_imm, "LDR (imm)", "cccc010pu0w1nnnnttttvvvvvvvvvvvv") +INST(arm_LDRB_imm, "LDRB (imm)", "cccc010pu1w1nnnnttttvvvvvvvvvvvv") +INST(arm_STR_imm, "STR (imm)", "cccc010pu0w0nnnnttttvvvvvvvvvvvv") +INST(arm_STRB_imm, "STRB (imm)", "cccc010pu1w0nnnnttttvvvvvvvvvvvv") +INST(arm_B, "B", "cccc1010vvvvvvvvvvvvvvvvvvvvvvvv") +INST(arm_BL, "BL", "cccc1011vvvvvvvvvvvvvvvvvvvvvvvv") +INST(arm_LDC, "LDC", "cccc110pudw1nnnnDDDDppppvvvvvvvv") +INST(arm_STC, "STC", "cccc110pudw0nnnnDDDDppppvvvvvvvv") +INST(arm_SVC, "SVC", "cccc1111vvvvvvvvvvvvvvvvvvvvvvvv") \ No newline at end of file diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index a16caedd87..2861b998ca 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -27,50 +27,12 @@ template using ASIMDMatcher = Decoder::Matcher; template -std::vector> GetASIMDDecodeTable() noexcept { - std::vector>> table = { -#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +static std::optional>> DecodeASIMD(u32 instruction) noexcept { + alignas(64) static const auto table = std::array{ +#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./asimd.inc" #undef INST }; - // Exceptions to the rule of thumb. - const std::set comes_first{ - "VBIC, VMOV, VMVN, VORR (immediate)", - "VEXT", - "VTBL", - "VTBX", - "VDUP (scalar)", - }; - const std::set comes_last{ - "VMLA (scalar)", - "VMLAL (scalar)", - "VQDMLAL/VQDMLSL (scalar)", - "VMUL (scalar)", - "VMULL (scalar)", - "VQDMULL (scalar)", - "VQDMULH (scalar)", - "VQRDMULH (scalar)", - }; - const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { - return comes_first.count(e.first) > 0; - }); - const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { - return comes_last.count(e.first) == 0; - }); - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) { - return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); - }); - std::vector> final_table; - std::transform(table.cbegin(), table.cend(), std::back_inserter(final_table), [](auto const& e) { - return e.second; - }); - return final_table; -} - -template -std::optional>> DecodeASIMD(u32 instruction) noexcept { - alignas(64) static const auto table = GetASIMDDecodeTable(); auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { return matcher.Matches(instruction); }); @@ -78,7 +40,7 @@ std::optional>> DecodeASIMD(u32 ins } template -std::optional GetNameASIMD(u32 inst) noexcept { +static std::optional GetNameASIMD(u32 inst) noexcept { std::vector>> list = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.inc b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.inc index d0dfd86752..def1a17866 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.inc +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.inc @@ -1,172 +1,151 @@ -// Three registers of the same length -INST(asimd_VHADD, "VHADD", "1111001U0Dzznnnndddd0000NQM0mmmm") // ASIMD -INST(asimd_VQADD, "VQADD", "1111001U0Dzznnnndddd0000NQM1mmmm") // ASIMD -INST(asimd_VRHADD, "VRHADD", "1111001U0Dzznnnndddd0001NQM0mmmm") // ASIMD -INST(asimd_VAND_reg, "VAND (register)", "111100100D00nnnndddd0001NQM1mmmm") // ASIMD -INST(asimd_VBIC_reg, "VBIC (register)", "111100100D01nnnndddd0001NQM1mmmm") // ASIMD -INST(asimd_VORR_reg, "VORR (register)", "111100100D10nnnndddd0001NQM1mmmm") // ASIMD -INST(asimd_VORN_reg, "VORN (register)", "111100100D11nnnndddd0001NQM1mmmm") // ASIMD -INST(asimd_VEOR_reg, "VEOR (register)", "111100110D00nnnndddd0001NQM1mmmm") // ASIMD -INST(asimd_VBSL, "VBSL", "111100110D01nnnndddd0001NQM1mmmm") // ASIMD -INST(asimd_VBIT, "VBIT", "111100110D10nnnndddd0001NQM1mmmm") // ASIMD -INST(asimd_VBIF, "VBIF", "111100110D11nnnndddd0001NQM1mmmm") // ASIMD -INST(asimd_VHSUB, "VHSUB", "1111001U0Dzznnnndddd0010NQM0mmmm") // ASIMD -INST(asimd_VQSUB, "VQSUB", "1111001U0Dzznnnndddd0010NQM1mmmm") // ASIMD -INST(asimd_VCGT_reg, "VCGT (register)", "1111001U0Dzznnnndddd0011NQM0mmmm") // ASIMD -INST(asimd_VCGE_reg, "VCGE (register)", "1111001U0Dzznnnndddd0011NQM1mmmm") // ASIMD -INST(asimd_VSHL_reg, "VSHL (register)", "1111001U0Dzznnnndddd0100NQM0mmmm") // ASIMD -INST(asimd_VQSHL_reg, "VQSHL (register)", "1111001U0Dzznnnndddd0100NQM1mmmm") // ASIMD -INST(asimd_VRSHL, "VRSHL", "1111001U0Dzznnnndddd0101NQM0mmmm") // ASIMD -//INST(asimd_VQRSHL, "VQRSHL", "1111001U0-CC--------0101---1----") // ASIMD -INST(asimd_VMAX, "VMAX/VMIN (integer)", "1111001U0Dzznnnnmmmm0110NQMommmm") // ASIMD -INST(asimd_VABD, "VABD", "1111001U0Dzznnnndddd0111NQM0mmmm") // ASIMD -INST(asimd_VABA, "VABA", "1111001U0Dzznnnndddd0111NQM1mmmm") // ASIMD -INST(asimd_VADD_int, "VADD (integer)", "111100100Dzznnnndddd1000NQM0mmmm") // ASIMD -INST(asimd_VSUB_int, "VSUB (integer)", "111100110Dzznnnndddd1000NQM0mmmm") // ASIMD -INST(asimd_VTST, "VTST", "111100100Dzznnnndddd1000NQM1mmmm") // ASIMD -INST(asimd_VCEQ_reg, "VCEG (register)", "111100110Dzznnnndddd1000NQM1mmmm") // ASIMD -INST(asimd_VMLA, "VMLA/VMLS", "1111001o0Dzznnnndddd1001NQM0mmmm") // ASIMD -INST(asimd_VMUL, "VMUL", "1111001P0Dzznnnndddd1001NQM1mmmm") // ASIMD -INST(asimd_VPMAX_int, "VPMAX/VPMIN (integer)", "1111001U0Dzznnnndddd1010NQMommmm") // ASIMD -INST(v8_VMAXNM, "VMAXNM", "111100110D0znnnndddd1111NQM1mmmm") // v8 -INST(v8_VMINNM, "VMINNM", "111100110D1znnnndddd1111NQM1mmmm") // v8 -INST(asimd_VQDMULH, "VQDMULH", "111100100Dzznnnndddd1011NQM0mmmm") // ASIMD -INST(asimd_VQRDMULH, "VQRDMULH", "111100110Dzznnnndddd1011NQM0mmmm") // ASIMD -INST(asimd_VPADD, "VPADD", "111100100Dzznnnndddd1011NQM1mmmm") // ASIMD -INST(asimd_VFMA, "VFMA", "111100100D0znnnndddd1100NQM1mmmm") // ASIMD -INST(asimd_VFMS, "VFMS", "111100100D1znnnndddd1100NQM1mmmm") // ASIMD -INST(asimd_VADD_float, "VADD (floating-point)", "111100100D0znnnndddd1101NQM0mmmm") // ASIMD -INST(asimd_VSUB_float, "VSUB (floating-point)", "111100100D1znnnndddd1101NQM0mmmm") // ASIMD -INST(asimd_VPADD_float, "VPADD (floating-point)", "111100110D0znnnndddd1101NQM0mmmm") // ASIMD -INST(asimd_VABD_float, "VABD (floating-point)", "111100110D1znnnndddd1101NQM0mmmm") // ASIMD -INST(asimd_VMLA_float, "VMLA (floating-point)", "111100100D0znnnndddd1101NQM1mmmm") // ASIMD -INST(asimd_VMLS_float, "VMLS (floating-point)", "111100100D1znnnndddd1101NQM1mmmm") // ASIMD -INST(asimd_VMUL_float, "VMUL (floating-point)", "111100110D0znnnndddd1101NQM1mmmm") // ASIMD -INST(asimd_VCEQ_reg_float, "VCEQ (register)", "111100100D0znnnndddd1110NQM0mmmm") // ASIMD -INST(asimd_VCGE_reg_float, "VCGE (register)", "111100110D0znnnndddd1110NQM0mmmm") // ASIMD -INST(asimd_VCGT_reg_float, "VCGT (register)", "111100110D1znnnndddd1110NQM0mmmm") // ASIMD -INST(asimd_VACGE, "VACGE", "111100110Doznnnndddd1110NQM1mmmm") // ASIMD -INST(asimd_VMAX_float, "VMAX (floating-point)", "111100100D0znnnndddd1111NQM0mmmm") // ASIMD -INST(asimd_VMIN_float, "VMIN (floating-point)", "111100100D1znnnndddd1111NQM0mmmm") // ASIMD -INST(asimd_VPMAX_float, "VPMAX (floating-point)", "111100110D0znnnndddd1111NQM0mmmm") // ASIMD -INST(asimd_VPMIN_float, "VPMIN (floating-point)", "111100110D1znnnndddd1111NQM0mmmm") // ASIMD -INST(asimd_VRECPS, "VRECPS", "111100100D0znnnndddd1111NQM1mmmm") // ASIMD -INST(asimd_VRSQRTS, "VRSQRTS", "111100100D1znnnndddd1111NQM1mmmm") // ASIMD -INST(v8_SHA256H, "SHA256H", "111100110D00nnnndddd1100NQM0mmmm") // v8 -INST(v8_SHA256H2, "SHA256H2", "111100110D01nnnndddd1100NQM0mmmm") // v8 -INST(v8_SHA256SU1, "SHA256SU1", "111100110D10nnnndddd1100NQM0mmmm") // v8 +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// DO NOT REORDER -// Three registers of different lengths -INST(asimd_VADDL, "VADDL/VADDW", "1111001U1Dzznnnndddd000oN0M0mmmm") // ASIMD -INST(asimd_VSUBL, "VSUBL/VSUBW", "1111001U1Dzznnnndddd001oN0M0mmmm") // ASIMD -//INST(asimd_VADDHN, "VADDHN", "111100101-----------0100-0-0----") // ASIMD -//INST(asimd_VRADDHN, "VRADDHN", "111100111-----------0100-0-0----") // ASIMD -INST(asimd_VABAL, "VABAL", "1111001U1Dzznnnndddd0101N0M0mmmm") // ASIMD -//INST(asimd_VSUBHN, "VSUBHN", "111100101-----------0110-0-0----") // ASIMD -//INST(asimd_VRSUBHN, "VRSUBHN", "111100111-----------0110-0-0----") // ASIMD -INST(asimd_VABDL, "VABDL", "1111001U1Dzznnnndddd0111N0M0mmmm") // ASIMD -INST(asimd_VMLAL, "VMLAL/VMLSL", "1111001U1Dzznnnndddd10o0N0M0mmmm") // ASIMD -//INST(asimd_VQDMLAL, "VQDMLAL", "111100101-----------10-1-0-0----") // ASIMD -INST(asimd_VMULL, "VMULL", "1111001U1Dzznnnndddd11P0N0M0mmmm") // ASIMD -//INST(asimd_VQDMULL, "VQDMULL", "111100101-----------1101-0-0----") // ASIMD - -// Two registers and a scalar -INST(asimd_VMLA_scalar, "VMLA (scalar)", "1111001Q1Dzznnnndddd0o0FN1M0mmmm") // ASIMD -INST(asimd_VMLAL_scalar, "VMLAL (scalar)", "1111001U1dzznnnndddd0o10N1M0mmmm") // ASIMD -//INST(asimd_VQDMLAL_scalar, "VQDMLAL/VQDMLSL (scalar)", "111100101-BB--------0x11-1-0----") // ASIMD -INST(asimd_VMUL_scalar, "VMUL (scalar)", "1111001Q1Dzznnnndddd100FN1M0mmmm") // ASIMD -INST(asimd_VMULL_scalar, "VMULL (scalar)", "1111001U1Dzznnnndddd1010N1M0mmmm") // ASIMD -INST(asimd_VQDMULL_scalar, "VQDMULL (scalar)", "111100101Dzznnnndddd1011N1M0mmmm") // ASIMD -INST(asimd_VQDMULH_scalar, "VQDMULH (scalar)", "1111001Q1Dzznnnndddd1100N1M0mmmm") // ASIMD -INST(asimd_VQRDMULH_scalar, "VQRDMULH (scalar)", "1111001Q1Dzznnnndddd1101N1M0mmmm") // ASIMD - -// Two registers and a shift amount -INST(asimd_SHR, "SHR", "1111001U1Diiiiiidddd0000LQM1mmmm") // ASIMD -INST(asimd_SRA, "SRA", "1111001U1Diiiiiidddd0001LQM1mmmm") // ASIMD -INST(asimd_VRSHR, "VRSHR", "1111001U1Diiiiiidddd0010LQM1mmmm") // ASIMD -INST(asimd_VRSRA, "VRSRA", "1111001U1Diiiiiidddd0011LQM1mmmm") // ASIMD -INST(asimd_VSRI, "VSRI", "111100111Diiiiiidddd0100LQM1mmmm") // ASIMD -INST(asimd_VSHL, "VSHL", "111100101Diiiiiidddd0101LQM1mmmm") // ASIMD -INST(asimd_VSLI, "VSLI", "111100111Diiiiiidddd0101LQM1mmmm") // ASIMD -INST(asimd_VQSHL, "VQSHL" , "1111001U1Diiiiiidddd011oLQM1mmmm") // ASIMD -INST(asimd_VSHRN, "VSHRN", "111100101Diiiiiidddd100000M1mmmm") // ASIMD -INST(asimd_VRSHRN, "VRSHRN", "111100101Diiiiiidddd100001M1mmmm") // ASIMD -INST(asimd_VQSHRUN, "VQSHRUN", "111100111Diiiiiidddd100000M1mmmm") // ASIMD -INST(asimd_VQRSHRUN, "VQRSHRUN", "111100111Diiiiiidddd100001M1mmmm") // ASIMD -INST(asimd_VQSHRN, "VQSHRN", "1111001U1Diiiiiidddd100100M1mmmm") // ASIMD -INST(asimd_VQRSHRN, "VQRSHRN", "1111001U1Diiiiiidddd100101M1mmmm") // ASIMD -INST(asimd_VSHLL, "VSHLL", "1111001U1Diiiiiidddd101000M1mmmm") // ASIMD -INST(asimd_VCVT_fixed, "VCVT (fixed-point)", "1111001U1Diiiiiidddd111o0QM1mmmm") // ASIMD - -// Two registers, miscellaneous -INST(asimd_VREV, "VREV{16,32,64}", "111100111D11zz00dddd000ooQM0mmmm") // ASIMD -INST(asimd_VPADDL, "VPADDL", "111100111D11zz00dddd0010oQM0mmmm") // ASIMD -INST(asimd_VCLS, "VCLS", "111100111D11zz00dddd01000QM0mmmm") // ASIMD -INST(asimd_VCLZ, "VCLZ", "111100111D11zz00dddd01001QM0mmmm") // ASIMD -INST(asimd_VCNT, "VCNT", "111100111D11zz00dddd01010QM0mmmm") // ASIMD -INST(asimd_VMVN_reg, "VMVN_reg", "111100111D11zz00dddd01011QM0mmmm") // ASIMD -INST(asimd_VPADAL, "VPADAL", "111100111D11zz00dddd0110oQM0mmmm") // ASIMD -INST(asimd_VQABS, "VQABS", "111100111D11zz00dddd01110QM0mmmm") // ASIMD -INST(asimd_VQNEG, "VQNEG", "111100111D11zz00dddd01111QM0mmmm") // ASIMD -INST(asimd_VCGT_zero, "VCGT (zero)", "111100111D11zz01dddd0F000QM0mmmm") // ASIMD -INST(asimd_VCGE_zero, "VCGE (zero)", "111100111D11zz01dddd0F001QM0mmmm") // ASIMD -INST(asimd_VCEQ_zero, "VCEQ (zero)", "111100111D11zz01dddd0F010QM0mmmm") // ASIMD -INST(asimd_VCLE_zero, "VCLE (zero)", "111100111D11zz01dddd0F011QM0mmmm") // ASIMD -INST(asimd_VCLT_zero, "VCLT (zero)", "111100111D11zz01dddd0F100QM0mmmm") // ASIMD -INST(arm_UDF, "UNALLOCATED", "111100111-11--01----01101--0----") // v8 -INST(asimd_VABS, "VABS", "111100111D11zz01dddd0F110QM0mmmm") // ASIMD -INST(asimd_VNEG, "VNEG", "111100111D11zz01dddd0F111QM0mmmm") // ASIMD -INST(asimd_VSWP, "VSWP", "111100111D110010dddd00000QM0mmmm") // ASIMD -INST(arm_UDF, "UNALLOCATED", "111100111-11--10----00000--0----") // ASIMD -INST(asimd_VTRN, "VTRN", "111100111D11zz10dddd00001QM0mmmm") // ASIMD -INST(asimd_VUZP, "VUZP", "111100111D11zz10dddd00010QM0mmmm") // ASIMD -INST(asimd_VZIP, "VZIP", "111100111D11zz10dddd00011QM0mmmm") // ASIMD -INST(asimd_VMOVN, "VMOVN", "111100111D11zz10dddd001000M0mmmm") // ASIMD -INST(asimd_VQMOVUN, "VQMOVUN", "111100111D11zz10dddd001001M0mmmm") // ASIMD -INST(asimd_VQMOVN, "VQMOVN", "111100111D11zz10dddd00101oM0mmmm") // ASIMD -INST(asimd_VSHLL_max, "VSHLL_max", "111100111D11zz10dddd001100M0mmmm") // ASIMD -INST(v8_VRINTN, "VRINTN", "111100111D11zz10dddd01000QM0mmmm") // v8 -INST(v8_VRINTX, "VRINTX", "111100111D11zz10dddd01001QM0mmmm") // v8 -INST(v8_VRINTA, "VRINTA", "111100111D11zz10dddd01010QM0mmmm") // v8 -INST(v8_VRINTZ, "VRINTZ", "111100111D11zz10dddd01011QM0mmmm") // v8 -INST(v8_VRINTM, "VRINTM", "111100111D11zz10dddd01101QM0mmmm") // v8 -INST(v8_VRINTP, "VRINTP", "111100111D11zz10dddd01111QM0mmmm") // v8 -INST(asimd_VCVT_half, "VCVT (half-precision)", "111100111D11zz10dddd011o00M0mmmm") // ASIMD -INST(arm_UDF, "UNALLOCATED", "111100111-11--10----011-01-0----") // ASIMD -INST(v8_VCVTA, "VCVTA", "111100111D11zz11dddd0000oQM0mmmm") // v8 -INST(v8_VCVTN, "VCVTN", "111100111D11zz11dddd0001oQM0mmmm") // v8 -INST(v8_VCVTP, "VCVTP", "111100111D11zz11dddd0010oQM0mmmm") // v8 -INST(v8_VCVTM, "VCVTM", "111100111D11zz11dddd0011oQM0mmmm") // v8 -INST(asimd_VRECPE, "VRECPE", "111100111D11zz11dddd010F0QM0mmmm") // ASIMD -INST(asimd_VRSQRTE, "VRSQRTE", "111100111D11zz11dddd010F1QM0mmmm") // ASIMD -INST(asimd_VCVT_integer, "VCVT (integer)", "111100111D11zz11dddd011oUQM0mmmm") // ASIMD - -// Two registers, cryptography -INST(v8_AESE, "AESE", "111100111D11zz00dddd001100M0mmmm") // v8 -INST(v8_AESD, "AESD", "111100111D11zz00dddd001101M0mmmm") // v8 -INST(v8_AESMC, "AESMC", "111100111D11zz00dddd001110M0mmmm") // v8 -INST(v8_AESIMC, "AESIMC", "111100111D11zz00dddd001111M0mmmm") // v8 -INST(arm_UDF, "UNALLOCATED", "111100111-11--01----001010-0----") // v8 -INST(arm_UDF, "UNALLOCATED (SHA1H)", "111100111-11--01----001011-0----") // v8 -INST(arm_UDF, "UNALLOCATED (SHA1SU1)", "111100111-11--10----001110-0----") // v8 -INST(v8_SHA256SU0, "SHA256SU0", "111100111D11zz10dddd001111M0mmmm") // v8 - -// One register and modified immediate -INST(asimd_VMOV_imm, "VBIC, VMOV, VMVN, VORR (immediate)", "1111001a1D000bcdVVVVmmmm0Qo1efgh") // ASIMD - -// Miscellaneous -INST(asimd_VEXT, "VEXT", "111100101D11nnnnddddiiiiNQM0mmmm") // ASIMD -INST(asimd_VTBL, "VTBL", "111100111D11nnnndddd10zzN0M0mmmm") // ASIMD -INST(asimd_VTBX, "VTBX", "111100111D11nnnndddd10zzN1M0mmmm") // ASIMD -INST(asimd_VDUP_scalar, "VDUP (scalar)", "111100111D11iiiidddd11000QM0mmmm") // ASIMD -INST(arm_UDF, "UNALLOCATED", "111100111-11--------11-----0----") // ASIMD - -// Advanced SIMD load/store structures -INST(v8_VST_multiple, "VST{1-4} (multiple)", "111101000D00nnnnddddxxxxzzaammmm") // v8 -INST(v8_VLD_multiple, "VLD{1-4} (multiple)", "111101000D10nnnnddddxxxxzzaammmm") // v8 -INST(arm_UDF, "UNALLOCATED", "111101000--0--------1011--------") // v8 -INST(arm_UDF, "UNALLOCATED", "111101000--0--------11----------") // v8 -INST(arm_UDF, "UNALLOCATED", "111101001-00--------11----------") // v8 -INST(v8_VLD_all_lanes, "VLD{1-4} (all lanes)", "111101001D10nnnndddd11nnzzTammmm") // v8 -INST(v8_VST_single, "VST{1-4} (single)", "111101001D00nnnnddddzzNNaaaammmm") // v8 -INST(v8_VLD_single, "VLD{1-4} (single)", "111101001D10nnnnddddzzNNaaaammmm") // v8 +INST(asimd_VMOV_imm, "VBIC, VMOV, VMVN, VORR (immediate)", "1111001a1D000bcdVVVVmmmm0Qo1efgh") +INST(asimd_VEXT, "VEXT", "111100101D11nnnnddddiiiiNQM0mmmm") +INST(asimd_VTBL, "VTBL", "111100111D11nnnndddd10zzN0M0mmmm") +INST(asimd_VTBX, "VTBX", "111100111D11nnnndddd10zzN1M0mmmm") +INST(asimd_VDUP_scalar, "VDUP (scalar)", "111100111D11iiiidddd11000QM0mmmm") +INST(asimd_VSWP, "VSWP", "111100111D110010dddd00000QM0mmmm") +INST(asimd_VMOVN, "VMOVN", "111100111D11zz10dddd001000M0mmmm") +INST(asimd_VQMOVUN, "VQMOVUN", "111100111D11zz10dddd001001M0mmmm") +INST(asimd_VSHLL_max, "VSHLL_max", "111100111D11zz10dddd001100M0mmmm") +INST(v8_AESE, "AESE", "111100111D11zz00dddd001100M0mmmm") +INST(v8_AESD, "AESD", "111100111D11zz00dddd001101M0mmmm") +INST(v8_AESMC, "AESMC", "111100111D11zz00dddd001110M0mmmm") +INST(v8_AESIMC, "AESIMC", "111100111D11zz00dddd001111M0mmmm") +INST(arm_UDF, "UNALLOCATED", "111100111-11--01----001010-0----") +INST(arm_UDF, "UNALLOCATED (SHA1H)", "111100111-11--01----001011-0----") +INST(arm_UDF, "UNALLOCATED (SHA1SU1)", "111100111-11--10----001110-0----") +INST(v8_SHA256SU0, "SHA256SU0", "111100111D11zz10dddd001111M0mmmm") +INST(asimd_VCLS, "VCLS", "111100111D11zz00dddd01000QM0mmmm") +INST(asimd_VCLZ, "VCLZ", "111100111D11zz00dddd01001QM0mmmm") +INST(asimd_VCNT, "VCNT", "111100111D11zz00dddd01010QM0mmmm") +INST(asimd_VMVN_reg, "VMVN_reg", "111100111D11zz00dddd01011QM0mmmm") +INST(asimd_VQABS, "VQABS", "111100111D11zz00dddd01110QM0mmmm") +INST(asimd_VQNEG, "VQNEG", "111100111D11zz00dddd01111QM0mmmm") +INST(arm_UDF, "UNALLOCATED", "111100111-11--01----01101--0----") +INST(arm_UDF, "UNALLOCATED", "111100111-11--10----00000--0----") +INST(asimd_VTRN, "VTRN", "111100111D11zz10dddd00001QM0mmmm") +INST(asimd_VUZP, "VUZP", "111100111D11zz10dddd00010QM0mmmm") +INST(asimd_VZIP, "VZIP", "111100111D11zz10dddd00011QM0mmmm") +INST(asimd_VQMOVN, "VQMOVN", "111100111D11zz10dddd00101oM0mmmm") +INST(v8_VRINTN, "VRINTN", "111100111D11zz10dddd01000QM0mmmm") +INST(v8_VRINTX, "VRINTX", "111100111D11zz10dddd01001QM0mmmm") +INST(v8_VRINTA, "VRINTA", "111100111D11zz10dddd01010QM0mmmm") +INST(v8_VRINTZ, "VRINTZ", "111100111D11zz10dddd01011QM0mmmm") +INST(v8_VRINTM, "VRINTM", "111100111D11zz10dddd01101QM0mmmm") +INST(v8_VRINTP, "VRINTP", "111100111D11zz10dddd01111QM0mmmm") +INST(asimd_VCVT_half, "VCVT (half-precision)", "111100111D11zz10dddd011o00M0mmmm") +INST(arm_UDF, "UNALLOCATED", "111100111-11--10----011-01-0----") +INST(asimd_VPADDL, "VPADDL", "111100111D11zz00dddd0010oQM0mmmm") +INST(asimd_VPADAL, "VPADAL", "111100111D11zz00dddd0110oQM0mmmm") +INST(asimd_VCGT_zero, "VCGT (zero)", "111100111D11zz01dddd0F000QM0mmmm") +INST(asimd_VCGE_zero, "VCGE (zero)", "111100111D11zz01dddd0F001QM0mmmm") +INST(asimd_VCEQ_zero, "VCEQ (zero)", "111100111D11zz01dddd0F010QM0mmmm") +INST(asimd_VCLE_zero, "VCLE (zero)", "111100111D11zz01dddd0F011QM0mmmm") +INST(asimd_VCLT_zero, "VCLT (zero)", "111100111D11zz01dddd0F100QM0mmmm") +INST(asimd_VABS, "VABS", "111100111D11zz01dddd0F110QM0mmmm") +INST(asimd_VNEG, "VNEG", "111100111D11zz01dddd0F111QM0mmmm") +INST(v8_VCVTA, "VCVTA", "111100111D11zz11dddd0000oQM0mmmm") +INST(v8_VCVTN, "VCVTN", "111100111D11zz11dddd0001oQM0mmmm") +INST(v8_VCVTP, "VCVTP", "111100111D11zz11dddd0010oQM0mmmm") +INST(v8_VCVTM, "VCVTM", "111100111D11zz11dddd0011oQM0mmmm") +INST(asimd_VRECPE, "VRECPE", "111100111D11zz11dddd010F0QM0mmmm") +INST(asimd_VRSQRTE, "VRSQRTE", "111100111D11zz11dddd010F1QM0mmmm") +INST(asimd_VREV, "VREV{16,32,64}", "111100111D11zz00dddd000ooQM0mmmm") +INST(asimd_VCVT_integer, "VCVT (integer)", "111100111D11zz11dddd011oUQM0mmmm") +INST(asimd_VAND_reg, "VAND (register)", "111100100D00nnnndddd0001NQM1mmmm") +INST(asimd_VBIC_reg, "VBIC (register)", "111100100D01nnnndddd0001NQM1mmmm") +INST(asimd_VORR_reg, "VORR (register)", "111100100D10nnnndddd0001NQM1mmmm") +INST(asimd_VORN_reg, "VORN (register)", "111100100D11nnnndddd0001NQM1mmmm") +INST(asimd_VEOR_reg, "VEOR (register)", "111100110D00nnnndddd0001NQM1mmmm") +INST(asimd_VBSL, "VBSL", "111100110D01nnnndddd0001NQM1mmmm") +INST(asimd_VBIT, "VBIT", "111100110D10nnnndddd0001NQM1mmmm") +INST(asimd_VBIF, "VBIF", "111100110D11nnnndddd0001NQM1mmmm") +INST(v8_SHA256H, "SHA256H", "111100110D00nnnndddd1100NQM0mmmm") +INST(v8_SHA256H2, "SHA256H2", "111100110D01nnnndddd1100NQM0mmmm") +INST(v8_SHA256SU1, "SHA256SU1", "111100110D10nnnndddd1100NQM0mmmm") +INST(asimd_VSHRN, "VSHRN", "111100101Diiiiiidddd100000M1mmmm") +INST(asimd_VRSHRN, "VRSHRN", "111100101Diiiiiidddd100001M1mmmm") +INST(asimd_VQSHRUN, "VQSHRUN", "111100111Diiiiiidddd100000M1mmmm") +INST(asimd_VQRSHRUN, "VQRSHRUN", "111100111Diiiiiidddd100001M1mmmm") +INST(v8_VMAXNM, "VMAXNM", "111100110D0znnnndddd1111NQM1mmmm") +INST(v8_VMINNM, "VMINNM", "111100110D1znnnndddd1111NQM1mmmm") +INST(asimd_VFMA, "VFMA", "111100100D0znnnndddd1100NQM1mmmm") +INST(asimd_VFMS, "VFMS", "111100100D1znnnndddd1100NQM1mmmm") +INST(asimd_VADD_float, "VADD (floating-point)", "111100100D0znnnndddd1101NQM0mmmm") +INST(asimd_VSUB_float, "VSUB (floating-point)", "111100100D1znnnndddd1101NQM0mmmm") +INST(asimd_VPADD_float, "VPADD (floating-point)", "111100110D0znnnndddd1101NQM0mmmm") +INST(asimd_VABD_float, "VABD (floating-point)", "111100110D1znnnndddd1101NQM0mmmm") +INST(asimd_VMLA_float, "VMLA (floating-point)", "111100100D0znnnndddd1101NQM1mmmm") +INST(asimd_VMLS_float, "VMLS (floating-point)", "111100100D1znnnndddd1101NQM1mmmm") +INST(asimd_VMUL_float, "VMUL (floating-point)", "111100110D0znnnndddd1101NQM1mmmm") +INST(asimd_VCEQ_reg_float, "VCEQ (register)", "111100100D0znnnndddd1110NQM0mmmm") +INST(asimd_VCGE_reg_float, "VCGE (register)", "111100110D0znnnndddd1110NQM0mmmm") +INST(asimd_VCGT_reg_float, "VCGT (register)", "111100110D1znnnndddd1110NQM0mmmm") +INST(asimd_VMAX_float, "VMAX (floating-point)", "111100100D0znnnndddd1111NQM0mmmm") +INST(asimd_VMIN_float, "VMIN (floating-point)", "111100100D1znnnndddd1111NQM0mmmm") +INST(asimd_VPMAX_float, "VPMAX (floating-point)", "111100110D0znnnndddd1111NQM0mmmm") +INST(asimd_VPMIN_float, "VPMIN (floating-point)", "111100110D1znnnndddd1111NQM0mmmm") +INST(asimd_VRECPS, "VRECPS", "111100100D0znnnndddd1111NQM1mmmm") +INST(asimd_VRSQRTS, "VRSQRTS", "111100100D1znnnndddd1111NQM1mmmm") +INST(asimd_VQSHRN, "VQSHRN", "1111001U1Diiiiiidddd100100M1mmmm") +INST(asimd_VQRSHRN, "VQRSHRN", "1111001U1Diiiiiidddd100101M1mmmm") +INST(asimd_VSHLL, "VSHLL", "1111001U1Diiiiiidddd101000M1mmmm") +INST(asimd_VADD_int, "VADD (integer)", "111100100Dzznnnndddd1000NQM0mmmm") +INST(asimd_VSUB_int, "VSUB (integer)", "111100110Dzznnnndddd1000NQM0mmmm") +INST(asimd_VTST, "VTST", "111100100Dzznnnndddd1000NQM1mmmm") +INST(asimd_VCEQ_reg, "VCEG (register)", "111100110Dzznnnndddd1000NQM1mmmm") +INST(asimd_VQDMULH, "VQDMULH", "111100100Dzznnnndddd1011NQM0mmmm") +INST(asimd_VQRDMULH, "VQRDMULH", "111100110Dzznnnndddd1011NQM0mmmm") +INST(asimd_VPADD, "VPADD", "111100100Dzznnnndddd1011NQM1mmmm") +INST(asimd_VACGE, "VACGE", "111100110Doznnnndddd1110NQM1mmmm") +INST(asimd_VABAL, "VABAL", "1111001U1Dzznnnndddd0101N0M0mmmm") +INST(asimd_VABDL, "VABDL", "1111001U1Dzznnnndddd0111N0M0mmmm") +INST(asimd_VSRI, "VSRI", "111100111Diiiiiidddd0100LQM1mmmm") +INST(asimd_VSHL, "VSHL", "111100101Diiiiiidddd0101LQM1mmmm") +INST(asimd_VSLI, "VSLI", "111100111Diiiiiidddd0101LQM1mmmm") +INST(arm_UDF, "UNALLOCATED", "111100111-11--------11-----0----") +INST(arm_UDF, "UNALLOCATED", "111101000--0--------1011--------") +INST(asimd_VHADD, "VHADD", "1111001U0Dzznnnndddd0000NQM0mmmm") +INST(asimd_VQADD, "VQADD", "1111001U0Dzznnnndddd0000NQM1mmmm") +INST(asimd_VRHADD, "VRHADD", "1111001U0Dzznnnndddd0001NQM0mmmm") +INST(asimd_VHSUB, "VHSUB", "1111001U0Dzznnnndddd0010NQM0mmmm") +INST(asimd_VQSUB, "VQSUB", "1111001U0Dzznnnndddd0010NQM1mmmm") +INST(asimd_VCGT_reg, "VCGT (register)", "1111001U0Dzznnnndddd0011NQM0mmmm") +INST(asimd_VCGE_reg, "VCGE (register)", "1111001U0Dzznnnndddd0011NQM1mmmm") +INST(asimd_VSHL_reg, "VSHL (register)", "1111001U0Dzznnnndddd0100NQM0mmmm") +INST(asimd_VQSHL_reg, "VQSHL (register)", "1111001U0Dzznnnndddd0100NQM1mmmm") +INST(asimd_VRSHL, "VRSHL", "1111001U0Dzznnnndddd0101NQM0mmmm") +INST(asimd_VABD, "VABD", "1111001U0Dzznnnndddd0111NQM0mmmm") +INST(asimd_VABA, "VABA", "1111001U0Dzznnnndddd0111NQM1mmmm") +INST(asimd_VMLA, "VMLA/VMLS", "1111001o0Dzznnnndddd1001NQM0mmmm") +INST(asimd_VMUL, "VMUL", "1111001P0Dzznnnndddd1001NQM1mmmm") +INST(asimd_VADDL, "VADDL/VADDW", "1111001U1Dzznnnndddd000oN0M0mmmm") +INST(asimd_VSUBL, "VSUBL/VSUBW", "1111001U1Dzznnnndddd001oN0M0mmmm") +INST(asimd_VMLAL, "VMLAL/VMLSL", "1111001U1Dzznnnndddd10o0N0M0mmmm") +INST(asimd_VMULL, "VMULL", "1111001U1Dzznnnndddd11P0N0M0mmmm") +INST(asimd_SHR, "SHR", "1111001U1Diiiiiidddd0000LQM1mmmm") +INST(asimd_SRA, "SRA", "1111001U1Diiiiiidddd0001LQM1mmmm") +INST(asimd_VRSHR, "VRSHR", "1111001U1Diiiiiidddd0010LQM1mmmm") +INST(asimd_VRSRA, "VRSRA", "1111001U1Diiiiiidddd0011LQM1mmmm") +INST(asimd_VCVT_fixed, "VCVT (fixed-point)", "1111001U1Diiiiiidddd111o0QM1mmmm") +INST(arm_UDF, "UNALLOCATED", "111101001-00--------11----------") +INST(v8_VLD_all_lanes, "VLD{1-4} (all lanes)", "111101001D10nnnndddd11nnzzTammmm") +INST(asimd_VMAX, "VMAX/VMIN (integer)", "1111001U0Dzznnnnmmmm0110NQMommmm") +INST(asimd_VPMAX_int, "VPMAX/VPMIN (integer)", "1111001U0Dzznnnndddd1010NQMommmm") +INST(asimd_VQSHL, "VQSHL", "1111001U1Diiiiiidddd011oLQM1mmmm") +INST(arm_UDF, "UNALLOCATED", "111101000--0--------11----------") +INST(v8_VST_multiple, "VST{1-4} (multiple)", "111101000D00nnnnddddxxxxzzaammmm") +INST(v8_VLD_multiple, "VLD{1-4} (multiple)", "111101000D10nnnnddddxxxxzzaammmm") +INST(v8_VST_single, "VST{1-4} (single)", "111101001D00nnnnddddzzNNaaaammmm") +INST(v8_VLD_single, "VLD{1-4} (single)", "111101001D10nnnnddddzzNNaaaammmm") +INST(asimd_VMLA_scalar, "VMLA (scalar)", "1111001Q1Dzznnnndddd0o0FN1M0mmmm") +INST(asimd_VMLAL_scalar, "VMLAL (scalar)", "1111001U1dzznnnndddd0o10N1M0mmmm") +INST(asimd_VMUL_scalar, "VMUL (scalar)", "1111001Q1Dzznnnndddd100FN1M0mmmm") +INST(asimd_VMULL_scalar, "VMULL (scalar)", "1111001U1Dzznnnndddd1010N1M0mmmm") +INST(asimd_VQDMULL_scalar, "VQDMULL (scalar)", "111100101Dzznnnndddd1011N1M0mmmm") +INST(asimd_VQDMULH_scalar, "VQDMULH (scalar)", "1111001Q1Dzznnnndddd1100N1M0mmmm") +INST(asimd_VQRDMULH_scalar, "VQRDMULH (scalar)", "1111001Q1Dzznnnndddd1101N1M0mmmm") diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h index 16b99ba5aa..eae296f59c 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -24,8 +24,8 @@ template using Thumb16Matcher = Decoder::Matcher; template -std::optional>> DecodeThumb16(u16 instruction) { - alignas(64) static const std::vector> table = { +static std::optional>> DecodeThumb16(u16 instruction) { + alignas(64) static const auto table = std::array{ #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)), #include "./thumb16.inc" #undef INST @@ -37,7 +37,7 @@ std::optional>> DecodeThumb16(u16 } template -std::optional GetNameThumb16(u32 inst) noexcept { +static std::optional GetNameThumb16(u32 inst) noexcept { std::vector>> list = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)) }, #include "./thumb16.inc" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h index 19418de67c..d82aef73fa 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -23,8 +23,8 @@ template using Thumb32Matcher = Decoder::Matcher; template -std::optional>> DecodeThumb32(u32 instruction) { - alignas(64) static const std::vector> table = { +static std::optional>> DecodeThumb32(u32 instruction) { + alignas(64) static const auto table = std::array{ #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./thumb32.inc" #undef INST @@ -36,7 +36,7 @@ std::optional>> DecodeThumb32(u32 } template -std::optional GetNameThumb32(u32 inst) noexcept { +static std::optional GetNameThumb32(u32 inst) noexcept { std::vector>> list = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./thumb32.inc" diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index a346304a9a..f1728e452b 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -24,7 +24,7 @@ template using VFPMatcher = Decoder::Matcher; template -std::optional>> DecodeVFP(u32 instruction) { +static std::optional>> DecodeVFP(u32 instruction) { using Table = std::vector>; alignas(64) static const struct Tables { Table unconditional; @@ -52,7 +52,7 @@ std::optional>> DecodeVFP(u32 instruc } template -std::optional GetNameVFP(u32 inst) noexcept { +static std::optional GetNameVFP(u32 inst) noexcept { std::vector>> list = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./vfp.inc" diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index eba9d73942..533a93f3aa 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -36,33 +36,19 @@ inline size_t ToFastLookupIndex(u32 instruction) { } // namespace detail template -constexpr DecodeTable GetDecodeTable() { - std::vector>> list = { -#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, -#include "./a64.inc" -#undef INST - }; - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) { - // If a matcher has more bits in its mask it is more specific, so it should come first. - return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); - }); - // Exceptions to the above rule of thumb. - std::stable_partition(list.begin(), list.end(), [&](const auto& e) { - return std::set{ - "MOVI, MVNI, ORR, BIC (vector, immediate)", - "FMOV (vector, immediate)", - "Unallocated SIMD modified immediate", - }.count(e.first) > 0; - }); +inline DecodeTable GetDecodeTable() { DecodeTable table{}; for (size_t i = 0; i < table.size(); ++i) { - for (auto const& e : list) { - const auto expect = detail::ToFastLookupIndex(e.second.GetExpected()); - const auto mask = detail::ToFastLookupIndex(e.second.GetMask()); - if ((i & mask) == expect) { - table[i].push_back(e.second); - } + // PLEASE HEAP ELLIDE + for (auto const& e : std::vector>{ +#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), +#include "./a64.inc" +#undef INST + }) { + const auto expect = detail::ToFastLookupIndex(e.GetExpected()); + const auto mask = detail::ToFastLookupIndex(e.GetMask()); + if ((i & mask) == expect) + table[i].push_back(e); } } return table; @@ -70,7 +56,7 @@ constexpr DecodeTable GetDecodeTable() { /// In practice it must always suceed, otherwise something else unrelated would have gone awry template -std::optional>> Decode(u32 instruction) { +inline std::optional>> Decode(u32 instruction) { alignas(64) static const auto table = GetDecodeTable(); const auto& subtable = table[detail::ToFastLookupIndex(instruction)]; auto iter = std::find_if(subtable.begin(), subtable.end(), [instruction](const auto& matcher) { @@ -82,7 +68,7 @@ std::optional>> Decode(u32 instruction) } template -std::optional GetName(u32 inst) noexcept { +inline std::optional GetName(u32 inst) noexcept { std::vector>> list = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./a64.inc" diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.inc b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.inc index 23f8b71933..8a3fab7ac1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.inc +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.inc @@ -1,1029 +1,647 @@ -// Data processing - Immediate - PC relative addressing -INST(ADR, "ADR", "0ii10000iiiiiiiiiiiiiiiiiiiddddd") -INST(ADRP, "ADRP", "1ii10000iiiiiiiiiiiiiiiiiiiddddd") - -// Data processing - Immediate - Add/Sub (with tags) -//INST(ADDG, "ADDG", "1001000110iiiiii00IIIInnnnnddddd") // ARMv8.5 -//INST(SUBG, "SUBG", "1101000110iiiiii00IIIInnnnnddddd") // ARMv8.5 - -// Data processing - Immediate - Add/Sub -INST(ADD_imm, "ADD (immediate)", "z0010001ssiiiiiiiiiiiinnnnnddddd") -INST(ADDS_imm, "ADDS (immediate)", "z0110001ssiiiiiiiiiiiinnnnnddddd") -INST(SUB_imm, "SUB (immediate)", "z1010001ssiiiiiiiiiiiinnnnnddddd") -INST(SUBS_imm, "SUBS (immediate)", "z1110001ssiiiiiiiiiiiinnnnnddddd") - -// Data processing - Immediate - Logical -INST(AND_imm, "AND (immediate)", "z00100100Nrrrrrrssssssnnnnnddddd") -INST(ORR_imm, "ORR (immediate)", "z01100100Nrrrrrrssssssnnnnnddddd") -INST(EOR_imm, "EOR (immediate)", "z10100100Nrrrrrrssssssnnnnnddddd") -INST(ANDS_imm, "ANDS (immediate)", "z11100100Nrrrrrrssssssnnnnnddddd") - -// Data processing - Immediate - Move Wide -INST(MOVN, "MOVN", "z00100101ssiiiiiiiiiiiiiiiiddddd") -INST(MOVZ, "MOVZ", "z10100101ssiiiiiiiiiiiiiiiiddddd") -INST(MOVK, "MOVK", "z11100101ssiiiiiiiiiiiiiiiiddddd") - -// Data processing - Immediate - Bitfield -INST(SBFM, "SBFM", "z00100110Nrrrrrrssssssnnnnnddddd") -INST(BFM, "BFM", "z01100110Nrrrrrrssssssnnnnnddddd") -INST(UBFM, "UBFM", "z10100110Nrrrrrrssssssnnnnnddddd") -INST(ASR_1, "ASR (immediate, 32-bit)", "00010011000rrrrr011111nnnnnddddd") -INST(ASR_2, "ASR (immediate, 64-bit)", "1001001101rrrrrr111111nnnnnddddd") -INST(SXTB_1, "SXTB (32-bit)", "0001001100000000000111nnnnnddddd") -INST(SXTB_2, "SXTB (64-bit)", "1001001101000000000111nnnnnddddd") -INST(SXTH_1, "SXTH (32-bit)", "0001001100000000001111nnnnnddddd") -INST(SXTH_2, "SXTH (64-bit)", "1001001101000000001111nnnnnddddd") -INST(SXTW, "SXTW", "1001001101000000011111nnnnnddddd") - -// Data processing - Immediate - Extract -INST(EXTR, "EXTR", "z00100111N0mmmmmssssssnnnnnddddd") - -// Conditional branch -INST(B_cond, "B.cond", "01010100iiiiiiiiiiiiiiiiiii0cccc") - -// Exception generation -INST(SVC, "SVC", "11010100000iiiiiiiiiiiiiiii00001") -//INST(HVC, "HVC", "11010100000iiiiiiiiiiiiiiii00010") -//INST(SMC, "SMC", "11010100000iiiiiiiiiiiiiiii00011") -INST(BRK, "BRK", "11010100001iiiiiiiiiiiiiiii00000") -//INST(HLT, "HLT", "11010100010iiiiiiiiiiiiiiii00000") -//INST(DCPS1, "DCPS1", "11010100101iiiiiiiiiiiiiiii00001") -//INST(DCPS2, "DCPS2", "11010100101iiiiiiiiiiiiiiii00010") -//INST(DCPS3, "DCPS3", "11010100101iiiiiiiiiiiiiiii00011") - -// System -//INST(MSR_imm, "MSR (immediate)", "1101010100000ooo0100MMMMooo11111") -INST(HINT, "HINT", "11010101000000110010MMMMooo11111") -INST(NOP, "NOP", "11010101000000110010000000011111") -INST(YIELD, "YIELD", "11010101000000110010000000111111") -INST(WFE, "WFE", "11010101000000110010000001011111") -INST(WFI, "WFI", "11010101000000110010000001111111") -INST(SEV, "SEV", "11010101000000110010000010011111") -INST(SEVL, "SEVL", "11010101000000110010000010111111") -//INST(DGH, "DGH", "11010101000000110010000011011111") // v8.6 -//INST(WFET, "WFET", "110101010000001100010000000ddddd") // v8.7 -//INST(WFIT, "WFIT", "110101010000001100010000001ddddd") // v8.7 -//INST(XPAC_1, "XPACD, XPACI, XPACLRI", "110110101100000101000D11111ddddd") -//INST(XPAC_2, "XPACD, XPACI, XPACLRI", "11010101000000110010000011111111") -//INST(PACIA_1, "PACIA, PACIA1716, PACIASP, PACIAZ, PACIZA", "110110101100000100Z000nnnnnddddd") -//INST(PACIA_2, "PACIA, PACIA1716, PACIASP, PACIAZ, PACIZA", "1101010100000011001000-100-11111") -//INST(PACIB_1, "PACIB, PACIB1716, PACIBSP, PACIBZ, PACIZB", "110110101100000100Z001nnnnnddddd") -//INST(PACIB_2, "PACIB, PACIB1716, PACIBSP, PACIBZ, PACIZB", "1101010100000011001000-101-11111") -//INST(AUTIA_1, "AUTIA, AUTIA1716, AUTIASP, AUTIAZ, AUTIZA", "110110101100000100Z100nnnnnddddd") -//INST(AUTIA_2, "AUTIA, AUTIA1716, AUTIASP, AUTIAZ, AUTIZA", "1101010100000011001000-110-11111") -//INST(AUTIB_1, "AUTIB, AUTIB1716, AUTIBSP, AUTIBZ, AUTIZB", "110110101100000100Z101nnnnnddddd") -//INST(AUTIB_2, "AUTIB, AUTIB1716, AUTIBSP, AUTIBZ, AUTIZB", "1101010100000011001000-111-11111") -//INST(BTI, "BTI", "110101010000001100100100ii011111") // ARMv8.5 -//INST(ESB, "ESB", "11010101000000110010001000011111") -//INST(PSB, "PSB CSYNC", "11010101000000110010001000111111") -//INST(TSB, "TSB CSYNC", "11010101000000110010001001011111") // ARMv8.5 -//INST(CSDB, "CSDB", "11010101000000110010001010011111") -INST(CLREX, "CLREX", "11010101000000110011MMMM01011111") -INST(DSB, "DSB", "11010101000000110011MMMM10011111") -//INST(SSBB, "SSBB", "11010101000000110011000010011111") -//INST(PSSBB, "PSSBB", "11010101000000110011010010011111") -INST(DMB, "DMB", "11010101000000110011MMMM10111111") -INST(ISB, "ISB", "11010101000000110011MMMM11011111") -//INST(SB, "SB", "11010101000000110011000011111111") -//INST(SYS, "SYS", "1101010100001oooNNNNMMMMooottttt") -INST(MSR_reg, "MSR (register)", "110101010001poooNNNNMMMMooottttt") -//INST(SYSL, "SYSL", "1101010100101oooNNNNMMMMooottttt") -INST(MRS, "MRS", "110101010011poooNNNNMMMMooottttt") - -// System - Flag manipulation instructions -INST(CFINV, "CFINV", "11010101000000000100000000011111") // ARMv8.4 -INST(RMIF, "RMIF", "10111010000iiiiii00001nnnnn0IIII") // ARMv8.4 -//INST(SETF8, "SETF8", "0011101000000000000010nnnnn01101") // ARMv8.4 -//INST(SETF16, "SETF16", "0011101000000000010010nnnnn01101") // ARMv8.4 - -// System - Flag format instructions -INST(XAFlag, "XAFlag", "11010101000000000100000000111111") // ARMv8.5 -INST(AXFlag, "AXFlag", "11010101000000000100000001011111") // ARMv8.5 - -// SYS: Data Cache -INST(DC_IVAC, "DC IVAC", "110101010000100001110110001ttttt") -INST(DC_ISW, "DC ISW", "110101010000100001110110010ttttt") -INST(DC_CSW, "DC CSW", "110101010000100001111010010ttttt") -INST(DC_CISW, "DC CISW", "110101010000100001111110010ttttt") -INST(DC_ZVA, "DC ZVA", "110101010000101101110100001ttttt") -INST(DC_CVAC, "DC CVAC", "110101010000101101111010001ttttt") -INST(DC_CVAU, "DC CVAU", "110101010000101101111011001ttttt") -INST(DC_CVAP, "DC CVAP", "110101010000101101111100001ttttt") -INST(DC_CIVAC, "DC CIVAC", "110101010000101101111110001ttttt") - -// SYS: Instruction Cache -INST(IC_IALLU, "IC IALLU", "11010101000010000111010100011111") -INST(IC_IALLUIS, "IC IALLUIS", "11010101000010000111000100011111") -INST(IC_IVAU, "IC IVAU", "110101010000101101110101001ttttt") - -// Unconditional branch (Register) -INST(BLR, "BLR", "1101011000111111000000nnnnn00000") -INST(BR, "BR", "1101011000011111000000nnnnn00000") -//INST(DRPS, "DRPS", "11010110101111110000001111100000") -//INST(ERET, "ERET", "11010110100111110000001111100000") -INST(RET, "RET", "1101011001011111000000nnnnn00000") -//INST(BLRA, "BLRAA, BLRAAZ, BLRAB, BLRABZ", "1101011Z0011111100001Mnnnnnmmmmm") // ARMv8.3 -//INST(BRA, "BRAA, BRAAZ, BRAB, BRABZ", "1101011Z0001111100001Mnnnnnmmmmm") // ARMv8.3 -//INST(ERETA, "ERETAA, ERETAB", "110101101001111100001M1111111111") // ARMv8.3 -//INST(RETA, "RETAA, RETAB", "110101100101111100001M1111111111") // ARMv8.3 - -// Unconditional branch (immediate) -INST(B_uncond, "B", "000101iiiiiiiiiiiiiiiiiiiiiiiiii") -INST(BL, "BL", "100101iiiiiiiiiiiiiiiiiiiiiiiiii") - -// Compare and branch (immediate) -INST(CBZ, "CBZ", "z0110100iiiiiiiiiiiiiiiiiiittttt") -INST(CBNZ, "CBNZ", "z0110101iiiiiiiiiiiiiiiiiiittttt") -INST(TBZ, "TBZ", "b0110110bbbbbiiiiiiiiiiiiiittttt") -INST(TBNZ, "TBNZ", "b0110111bbbbbiiiiiiiiiiiiiittttt") - -// Loads and stores - Advanced SIMD Load/Store multiple structures -INST(STx_mult_1, "STx (multiple structures)", "0Q00110000000000oooozznnnnnttttt") -INST(STx_mult_2, "STx (multiple structures)", "0Q001100100mmmmmoooozznnnnnttttt") -INST(LDx_mult_1, "LDx (multiple structures)", "0Q00110001000000oooozznnnnnttttt") -INST(LDx_mult_2, "LDx (multiple structures)", "0Q001100110mmmmmoooozznnnnnttttt") - -// Loads and stores - Advanced SIMD Load/Store single structures -INST(ST1_sngl_1, "ST1 (single structure)", "0Q00110100000000oo0Szznnnnnttttt") -INST(ST1_sngl_2, "ST1 (single structure)", "0Q001101100mmmmmoo0Szznnnnnttttt") -INST(ST3_sngl_1, "ST3 (single structure)", "0Q00110100000000oo1Szznnnnnttttt") -INST(ST3_sngl_2, "ST3 (single structure)", "0Q001101100mmmmmoo1Szznnnnnttttt") -INST(ST2_sngl_1, "ST2 (single structure)", "0Q00110100100000oo0Szznnnnnttttt") -INST(ST2_sngl_2, "ST2 (single structure)", "0Q001101101mmmmmoo0Szznnnnnttttt") -INST(ST4_sngl_1, "ST4 (single structure)", "0Q00110100100000oo1Szznnnnnttttt") -INST(ST4_sngl_2, "ST4 (single structure)", "0Q001101101mmmmmoo1Szznnnnnttttt") -INST(LD1_sngl_1, "LD1 (single structure)", "0Q00110101000000oo0Szznnnnnttttt") -INST(LD1_sngl_2, "LD1 (single structure)", "0Q001101110mmmmmoo0Szznnnnnttttt") -INST(LD3_sngl_1, "LD3 (single structure)", "0Q00110101000000oo1Szznnnnnttttt") -INST(LD3_sngl_2, "LD3 (single structure)", "0Q001101110mmmmmoo1Szznnnnnttttt") -INST(LD1R_1, "LD1R", "0Q001101010000001100zznnnnnttttt") -INST(LD1R_2, "LD1R", "0Q001101110mmmmm1100zznnnnnttttt") -INST(LD3R_1, "LD3R", "0Q001101010000001110zznnnnnttttt") -INST(LD3R_2, "LD3R", "0Q001101110mmmmm1110zznnnnnttttt") -INST(LD2_sngl_1, "LD2 (single structure)", "0Q00110101100000oo0Szznnnnnttttt") -INST(LD2_sngl_2, "LD2 (single structure)", "0Q001101111mmmmmoo0Szznnnnnttttt") -INST(LD4_sngl_1, "LD4 (single structure)", "0Q00110101100000oo1Szznnnnnttttt") -INST(LD4_sngl_2, "LD4 (single structure)", "0Q001101111mmmmmoo1Szznnnnnttttt") -INST(LD2R_1, "LD2R", "0Q001101011000001100zznnnnnttttt") -INST(LD2R_2, "LD2R", "0Q001101111mmmmm1100zznnnnnttttt") -INST(LD4R_1, "LD4R", "0Q001101011000001110zznnnnnttttt") -INST(LD4R_2, "LD4R", "0Q001101111mmmmm1110zznnnnnttttt") - -// Loads and stores - Load/Store Exclusive -INST(STXR, "STXRB, STXRH, STXR", "zz001000000sssss011111nnnnnttttt") -INST(STLXR, "STLXRB, STLXRH, STLXR", "zz001000000sssss111111nnnnnttttt") -INST(STXP, "STXP", "1z001000001sssss0uuuuunnnnnttttt") -INST(STLXP, "STLXP", "1z001000001sssss1uuuuunnnnnttttt") -INST(LDXR, "LDXRB, LDXRH, LDXR", "zz00100001011111011111nnnnnttttt") -INST(LDAXR, "LDAXRB, LDAXRH, LDAXR", "zz00100001011111111111nnnnnttttt") -INST(LDXP, "LDXP", "1z001000011111110uuuuunnnnnttttt") -INST(LDAXP, "LDAXP", "1z001000011111111uuuuunnnnnttttt") -INST(STLLR, "STLLRB, STLLRH, STLLR", "zz00100010011111011111nnnnnttttt") -INST(STLR, "STLRB, STLRH, STLR", "zz00100010011111111111nnnnnttttt") -INST(LDLAR, "LDLARB, LDLARH, LDLAR", "zz00100011011111011111nnnnnttttt") -INST(LDAR, "LDARB, LDARH, LDAR", "zz00100011011111111111nnnnnttttt") -//INST(CASP, "CASP, CASPA, CASPAL, CASPL", "0z0010000L1sssssp11111nnnnnttttt") // ARMv8.1 -//INST(CASB, "CASB, CASAB, CASALB, CASLB", "000010001L1sssssp11111nnnnnttttt") // ARMv8.1 -//INST(CASH, "CASH, CASAH, CASALH, CASLH", "010010001L1sssssp11111nnnnnttttt") // ARMv8.1 -//INST(CAS, "CAS, CASA, CASAL, CASL", "1z0010001L1sssssp11111nnnnnttttt") // ARMv8.1 - -// Loads and stores - Load register (literal) -INST(LDR_lit_gen, "LDR (literal)", "0z011000iiiiiiiiiiiiiiiiiiittttt") -INST(LDRSW_lit, "LDRSW (literal)", "10011000iiiiiiiiiiiiiiiiiiittttt") -INST(PRFM_lit, "PRFM (literal)", "11011000iiiiiiiiiiiiiiiiiiittttt") -INST(LDR_lit_fpsimd, "LDR (literal, SIMD&FP)", "oo011100iiiiiiiiiiiiiiiiiiittttt") - -// Loads and stores - Load/Store no-allocate pair -INST(STNP_LDNP_gen, "STNP/LDNP", "o01010000Liiiiiiiuuuuunnnnnttttt") -INST(STNP_LDNP_fpsimd, "STNP/LDNP (SIMD&FP)", "oo1011000Liiiiiiiuuuuunnnnnttttt") - -// Loads and stores - Load/Store register pair -INST(STP_LDP_gen, "STP/LDP", "oo10100pwLiiiiiiiuuuuunnnnnttttt") -INST(UnallocatedEncoding, "", "--1010000-----------------------") -INST(STP_LDP_fpsimd, "STP/LDP (SIMD&FP)", "oo10110pwLiiiiiiiuuuuunnnnnttttt") -INST(UnallocatedEncoding, "", "--1011000-----------------------") - -// Loads and stores - Load/Store register (unscaled immediate) -INST(STURx_LDURx, "STURx/LDURx", "zz111000oo0iiiiiiiii00nnnnnttttt") -INST(UnallocatedEncoding, "", "111110001-0---------00----------") -INST(UnallocatedEncoding, "", "10111000110---------00----------") -INST(PRFM_imm, "PRFM (immediate)", "1111100110iiiiiiiiiiiinnnnnttttt") -INST(PRFM_unscaled_imm, "PRFM (unscaled offset)", "11111000100iiiiiiiii00nnnnnttttt") -INST(STUR_fpsimd, "STUR (SIMD&FP)", "zz111100o00iiiiiiiii00nnnnnttttt") -INST(LDUR_fpsimd, "LDUR (SIMD&FP)", "zz111100o10iiiiiiiii00nnnnnttttt") - -// Loads and stores - Load/Store register (immediate pre/post-indexed) -INST(STRx_LDRx_imm_1, "STRx/LDRx (immediate)", "zz111000oo0iiiiiiiiip1nnnnnttttt") -INST(STRx_LDRx_imm_2, "STRx/LDRx (immediate)", "zz111001ooiiiiiiiiiiiinnnnnttttt") -INST(UnallocatedEncoding, "", "111110001-0----------1----------") -INST(UnallocatedEncoding, "", "10111000110----------1----------") -INST(UnallocatedEncoding, "", "1111100111----------------------") -INST(UnallocatedEncoding, "", "1011100111----------------------") -INST(STR_imm_fpsimd_1, "STR (immediate, SIMD&FP)", "zz111100o00iiiiiiiiip1nnnnnttttt") -INST(STR_imm_fpsimd_2, "STR (immediate, SIMD&FP)", "zz111101o0iiiiiiiiiiiinnnnnttttt") -INST(LDR_imm_fpsimd_1, "LDR (immediate, SIMD&FP)", "zz111100o10iiiiiiiiip1nnnnnttttt") -INST(LDR_imm_fpsimd_2, "LDR (immediate, SIMD&FP)", "zz111101o1iiiiiiiiiiiinnnnnttttt") -//INST(STGP_1, "STGP (post-index)", "0110100010iiiiiiimmmmmnnnnnttttt") // ARMv8.5 -//INST(STGP_2, "STGP (pre-index)", "0110100110iiiiiiimmmmmnnnnnttttt") // ARMv8.5 -//INST(STGP_3, "STGP (signed-offset)", "0110100100iiiiiiimmmmmnnnnnttttt") // ARMv8.5 - -// Loads and stores - Load/Store register (unprivileged) -INST(STTRB, "STTRB", "00111000000iiiiiiiii10nnnnnttttt") -INST(LDTRB, "LDTRB", "00111000010iiiiiiiii10nnnnnttttt") -INST(LDTRSB, "LDTRSB", "00111000oo0iiiiiiiii10nnnnnttttt") -INST(STTRH, "STTRH", "01111000000iiiiiiiii10nnnnnttttt") -INST(LDTRH, "LDTRH", "01111000010iiiiiiiii10nnnnnttttt") -INST(LDTRSH, "LDTRSH", "01111000oo0iiiiiiiii10nnnnnttttt") -INST(STTR, "STTR", "zz111000000iiiiiiiii10nnnnnttttt") -INST(LDTR, "LDTR", "zz111000010iiiiiiiii10nnnnnttttt") -INST(LDTRSW, "LDTRSW", "10111000100iiiiiiiii10nnnnnttttt") - -// Loads and stores - Atomic memory options -//INST(LDADDB, "LDADDB, LDADDAB, LDADDALB, LDADDLB", "00111000AR1sssss000000nnnnnttttt") -//INST(LDCLRB, "LDCLRB, LDCLRAB, LDCLRALB, LDCLRLB", "00111000AR1sssss000100nnnnnttttt") -//INST(LDEORB, "LDEORB, LDEORAB, LDEORALB, LDEORLB", "00111000AR1sssss001000nnnnnttttt") -//INST(LDSETB, "LDSETB, LDSETAB, LDSETALB, LDSETLB", "00111000AR1sssss001100nnnnnttttt") -//INST(LDSMAXB, "LDSMAXB, LDSMAXAB, LDSMAXALB, LDSMAXLB", "00111000AR1sssss010000nnnnnttttt") -//INST(LDSMINB, "LDSMINB, LDSMINAB, LDSMINALB, LDSMINLB", "00111000AR1sssss010100nnnnnttttt") -//INST(LDUMAXB, "LDUMAXB, LDUMAXAB, LDUMAXALB, LDUMAXLB", "00111000AR1sssss011000nnnnnttttt") -//INST(LDUMINB, "LDUMINB, LDUMINAB, LDUMINALB, LDUMINLB", "00111000AR1sssss011100nnnnnttttt") -//INST(SWPB, "SWPB, SWPAB, SWPALB, SWPLB", "00111000AR1sssss100000nnnnnttttt") -//INST(LDAPRB, "LDAPRB", "0011100010111111110000nnnnnttttt") -//INST(LDADDH, "LDADDH, LDADDAH, LDADDALH, LDADDLH", "01111000AR1sssss000000nnnnnttttt") -//INST(LDCLRH, "LDCLRH, LDCLRAH, LDCLRALH, LDCLRLH", "01111000AR1sssss000100nnnnnttttt") -//INST(LDEORH, "LDEORH, LDEORAH, LDEORALH, LDEORLH", "01111000AR1sssss001000nnnnnttttt") -//INST(LDSETH, "LDSETH, LDSETAH, LDSETALH, LDSETLH", "01111000AR1sssss001100nnnnnttttt") -//INST(LDSMAXH, "LDSMAXH, LDSMAXAH, LDSMAXALH, LDSMAXLH", "01111000AR1sssss010000nnnnnttttt") -//INST(LDSMINH, "LDSMINH, LDSMINAH, LDSMINALH, LDSMINLH", "01111000AR1sssss010100nnnnnttttt") -//INST(LDUMAXH, "LDUMAXH, LDUMAXAH, LDUMAXALH, LDUMAXLH", "01111000AR1sssss011000nnnnnttttt") -//INST(LDUMINH, "LDUMINH, LDUMINAH, LDUMINALH, LDUMINLH", "01111000AR1sssss011100nnnnnttttt") -//INST(SWPH, "SWPH, SWPAH, SWPALH, SWPLH", "01111000AR1sssss100000nnnnnttttt") -//INST(LDAPRH, "LDAPRH", "0111100010111111110000nnnnnttttt") -//INST(LDADD, "LDADD, LDADDA, LDADDAL, LDADDL", "1-111000AR1sssss000000nnnnnttttt") -//INST(LDCLR, "LDCLR, LDCLRA, LDCLRAL, LDCLRL", "1-111000AR1sssss000100nnnnnttttt") -//INST(LDEOR, "LDEOR, LDEORA, LDEORAL, LDEORL", "1-111000AR1sssss001000nnnnnttttt") -//INST(LDSET, "LDSET, LDSETA, LDSETAL, LDSETL", "1-111000AR1sssss001100nnnnnttttt") -//INST(LDSMAX, "LDSMAX, LDSMAXA, LDSMAXAL, LDSMAXL", "1-111000AR1sssss010000nnnnnttttt") -//INST(LDSMIN, "LDSMIN, LDSMINA, LDSMINAL, LDSMINL", "1-111000AR1sssss010100nnnnnttttt") -//INST(LDUMAX, "LDUMAX, LDUMAXA, LDUMAXAL, LDUMAXL", "1-111000AR1sssss011000nnnnnttttt") -//INST(LDUMIN, "LDUMIN, LDUMINA, LDUMINAL, LDUMINL", "1-111000AR1sssss011100nnnnnttttt") -//INST(SWP, "SWP, SWPA, SWPAL, SWPL", "1-111000AR1sssss100000nnnnnttttt") -//INST(LDAPR, "LDAPR", "1-11100010111111110000nnnnnttttt") -//INST(LD64B, "LD64B", "1111100000111111110100nnnnnttttt") // v8.7 -//INST(ST64B, "ST64B", "1111100000111111100100nnnnnttttt") // v8.7 -//INST(ST64BV, "ST64BV", "11111000001sssss101100nnnnnttttt") // v8.7 -//INST(ST64BV0, "ST64BV0", "11111000001sssss101000nnnnnttttt") // v8.7 - -// Loads and stores - Load/Store register (register offset) -INST(STRx_reg, "STRx (register)", "zz111000o01mmmmmxxxS10nnnnnttttt") -INST(LDRx_reg, "LDRx (register)", "zz111000o11mmmmmxxxS10nnnnnttttt") -INST(STR_reg_fpsimd, "STR (register, SIMD&FP)", "zz111100o01mmmmmxxxS10nnnnnttttt") -INST(LDR_reg_fpsimd, "LDR (register, SIMD&FP)", "zz111100o11mmmmmxxxS10nnnnnttttt") - -// Loads and stores - Load/Store memory tags -//INST(STG_1, "STG (post-index)", "11011001001iiiiiiiii01nnnnn11111") // ARMv8.5 -//INST(STG_2, "STG (pre-index)", "11011001001iiiiiiiii11nnnnn11111") // ARMv8.5 -//INST(STG_3, "STG (signed-offset)", "11011001001iiiiiiiii10nnnnn11111") // ARMv8.5 -//INST(LDG, "LDG", "11011001011iiiiiiiii00nnnnnttttt") // ARMv8.5 -//INST(STZG_1, "STZG (post-index)", "11011001011iiiiiiiii01nnnnn11111") // ARMv8.5 -//INST(STZG_2, "STZG (pre-index)", "11011001011iiiiiiiii11nnnnn11111") // ARMv8.5 -//INST(STZG_3, "STZG (signed-offset)", "11011001011iiiiiiiii10nnnnn11111") // ARMv8.5 -//INST(ST2G_1, "ST2G (post-index)", "11011001101iiiiiiiii01nnnnn11111") // ARMv8.5 -//INST(ST2G_2, "ST2G (pre-index)", "11011001101iiiiiiiii11nnnnn11111") // ARMv8.5 -//INST(ST2G_3, "ST2G (signed-offset)", "11011001101iiiiiiiii10nnnnn11111") // ARMv8.5 -//INST(STGV, "STGV", "1101100110100000000000nnnnnttttt") // ARMv8.5 -//INST(STZ2G_1, "STZ2G (post-index)", "11011001111iiiiiiiii01nnnnn11111") // ARMv8.5 -//INST(STZ2G_2, "STZ2G (pre-index)", "11011001111iiiiiiiii11nnnnn11111") // ARMv8.5 -//INST(STZ2G_3, "STZ2G (signed-offset)", "11011001111iiiiiiiii10nnnnn11111") // ARMv8.5 -//INST(LDGV, "LDGV", "1101100111100000000000nnnnnttttt") // ARMv8.5 - -// Loads and stores - Load/Store register (pointer authentication) -//INST(LDRA, "LDRAA, LDRAB", "11111000MS1iiiiiiiiiW1nnnnnttttt") - -// Data Processing - Register - 2 source -INST(UDIV, "UDIV", "z0011010110mmmmm000010nnnnnddddd") -INST(SDIV, "SDIV", "z0011010110mmmmm000011nnnnnddddd") -INST(LSLV, "LSLV", "z0011010110mmmmm001000nnnnnddddd") -INST(LSRV, "LSRV", "z0011010110mmmmm001001nnnnnddddd") -INST(ASRV, "ASRV", "z0011010110mmmmm001010nnnnnddddd") -INST(RORV, "RORV", "z0011010110mmmmm001011nnnnnddddd") -INST(CRC32, "CRC32B, CRC32H, CRC32W, CRC32X", "z0011010110mmmmm0100zznnnnnddddd") -INST(CRC32C, "CRC32CB, CRC32CH, CRC32CW, CRC32CX", "z0011010110mmmmm0101zznnnnnddddd") -//INST(PACGA, "PACGA", "10011010110mmmmm001100nnnnnddddd") -//INST(SUBP, "SUBP", "10011010110mmmmm000000nnnnnddddd") // ARMv8.5 -//INST(IRG, "IRG", "10011010110mmmmm000100nnnnnddddd") // ARMv8.5 -//INST(GMI, "GMI", "10011010110mmmmm000101nnnnnddddd") // ARMv8.5 -//INST(SUBPS, "SUBPS", "10111010110mmmmm000000nnnnnddddd") // ARMv8.5 - -// Data Processing - Register - 1 source -INST(RBIT_int, "RBIT", "z101101011000000000000nnnnnddddd") -INST(REV16_int, "REV16", "z101101011000000000001nnnnnddddd") -INST(REV, "REV", "z10110101100000000001onnnnnddddd") -INST(CLZ_int, "CLZ", "z101101011000000000100nnnnnddddd") -INST(CLS_int, "CLS", "z101101011000000000101nnnnnddddd") -INST(REV32_int, "REV32", "1101101011000000000010nnnnnddddd") -//INST(PACDA, "PACDA, PACDZA", "110110101100000100Z010nnnnnddddd") -//INST(PACDB, "PACDB, PACDZB", "110110101100000100Z011nnnnnddddd") -//INST(AUTDA, "AUTDA, AUTDZA", "110110101100000100Z110nnnnnddddd") -//INST(AUTDB, "AUTDB, AUTDZB", "110110101100000100Z111nnnnnddddd") - -// Data Processing - Register - Logical (shifted register) -INST(AND_shift, "AND (shifted register)", "z0001010ss0mmmmmiiiiiinnnnnddddd") -INST(BIC_shift, "BIC (shifted register)", "z0001010ss1mmmmmiiiiiinnnnnddddd") -INST(ORR_shift, "ORR (shifted register)", "z0101010ss0mmmmmiiiiiinnnnnddddd") -INST(ORN_shift, "ORN (shifted register)", "z0101010ss1mmmmmiiiiiinnnnnddddd") -INST(EOR_shift, "EOR (shifted register)", "z1001010ss0mmmmmiiiiiinnnnnddddd") -INST(EON, "EON (shifted register)", "z1001010ss1mmmmmiiiiiinnnnnddddd") -INST(ANDS_shift, "ANDS (shifted register)", "z1101010ss0mmmmmiiiiiinnnnnddddd") -INST(BICS, "BICS (shifted register)", "z1101010ss1mmmmmiiiiiinnnnnddddd") - -// Data Processing - Register - Add/Sub (shifted register) -INST(ADD_shift, "ADD (shifted register)", "z0001011ss0mmmmmiiiiiinnnnnddddd") -INST(ADDS_shift, "ADDS (shifted register)", "z0101011ss0mmmmmiiiiiinnnnnddddd") -INST(SUB_shift, "SUB (shifted register)", "z1001011ss0mmmmmiiiiiinnnnnddddd") -INST(SUBS_shift, "SUBS (shifted register)", "z1101011ss0mmmmmiiiiiinnnnnddddd") - -// Data Processing - Register - Add/Sub (shifted register) -INST(ADD_ext, "ADD (extended register)", "z0001011001mmmmmxxxiiinnnnnddddd") -INST(ADDS_ext, "ADDS (extended register)", "z0101011001mmmmmxxxiiinnnnnddddd") -INST(SUB_ext, "SUB (extended register)", "z1001011001mmmmmxxxiiinnnnnddddd") -INST(SUBS_ext, "SUBS (extended register)", "z1101011001mmmmmxxxiiinnnnnddddd") - -// Data Processing - Register - Add/Sub (with carry) -INST(ADC, "ADC", "z0011010000mmmmm000000nnnnnddddd") -INST(ADCS, "ADCS", "z0111010000mmmmm000000nnnnnddddd") -INST(SBC, "SBC", "z1011010000mmmmm000000nnnnnddddd") -INST(SBCS, "SBCS", "z1111010000mmmmm000000nnnnnddddd") - -// Data Processing - Register - Conditional compare -INST(CCMN_reg, "CCMN (register)", "z0111010010mmmmmcccc00nnnnn0ffff") -INST(CCMP_reg, "CCMP (register)", "z1111010010mmmmmcccc00nnnnn0ffff") -INST(CCMN_imm, "CCMN (immediate)", "z0111010010iiiiicccc10nnnnn0ffff") -INST(CCMP_imm, "CCMP (immediate)", "z1111010010iiiiicccc10nnnnn0ffff") - -// Data Processing - Register - Conditional select -INST(CSEL, "CSEL", "z0011010100mmmmmcccc00nnnnnddddd") -INST(CSINC, "CSINC", "z0011010100mmmmmcccc01nnnnnddddd") -INST(CSINV, "CSINV", "z1011010100mmmmmcccc00nnnnnddddd") -INST(CSNEG, "CSNEG", "z1011010100mmmmmcccc01nnnnnddddd") - -// Data Processing - Register - 3 source -INST(MADD, "MADD", "z0011011000mmmmm0aaaaannnnnddddd") -INST(MSUB, "MSUB", "z0011011000mmmmm1aaaaannnnnddddd") -INST(SMADDL, "SMADDL", "10011011001mmmmm0aaaaannnnnddddd") -INST(SMSUBL, "SMSUBL", "10011011001mmmmm1aaaaannnnnddddd") -INST(SMULH, "SMULH", "10011011010mmmmm011111nnnnnddddd") -INST(UMADDL, "UMADDL", "10011011101mmmmm0aaaaannnnnddddd") -INST(UMSUBL, "UMSUBL", "10011011101mmmmm1aaaaannnnnddddd") -INST(UMULH, "UMULH", "10011011110mmmmm011111nnnnnddddd") - -// Data Processing - FP and SIMD - AES -INST(AESE, "AESE", "0100111000101000010010nnnnnddddd") -INST(AESD, "AESD", "0100111000101000010110nnnnnddddd") -INST(AESMC, "AESMC", "0100111000101000011010nnnnnddddd") -INST(AESIMC, "AESIMC", "0100111000101000011110nnnnnddddd") - -// Data Processing - FP and SIMD - SHA -INST(SHA1C, "SHA1C", "01011110000mmmmm000000nnnnnddddd") -INST(SHA1P, "SHA1P", "01011110000mmmmm000100nnnnnddddd") -INST(SHA1M, "SHA1M", "01011110000mmmmm001000nnnnnddddd") -INST(SHA1SU0, "SHA1SU0", "01011110000mmmmm001100nnnnnddddd") -INST(SHA256H, "SHA256H", "01011110000mmmmm010000nnnnnddddd") -INST(SHA256H2, "SHA256H2", "01011110000mmmmm010100nnnnnddddd") -INST(SHA256SU1, "SHA256SU1", "01011110000mmmmm011000nnnnnddddd") -INST(SHA1H, "SHA1H", "0101111000101000000010nnnnnddddd") -INST(SHA1SU1, "SHA1SU1", "0101111000101000000110nnnnnddddd") -INST(SHA256SU0, "SHA256SU0", "0101111000101000001010nnnnnddddd") - -// Data Processing - FP and SIMD - Scalar copy -INST(DUP_elt_1, "DUP (element)", "01011110000iiiii000001nnnnnddddd") - -// Data Processing - FP and SIMD - Scalar three -//INST(FMULX_vec_1, "FMULX", "01011110010mmmmm000111nnnnnddddd") -INST(FMULX_vec_2, "FMULX", "010111100z1mmmmm110111nnnnnddddd") -INST(FCMEQ_reg_1, "FCMEQ (register)", "01011110010mmmmm001001nnnnnddddd") -INST(FCMEQ_reg_2, "FCMEQ (register)", "010111100z1mmmmm111001nnnnnddddd") -INST(FRECPS_1, "FRECPS", "01011110010mmmmm001111nnnnnddddd") -INST(FRECPS_2, "FRECPS", "010111100z1mmmmm111111nnnnnddddd") -INST(FRSQRTS_1, "FRSQRTS", "01011110110mmmmm001111nnnnnddddd") -INST(FRSQRTS_2, "FRSQRTS", "010111101z1mmmmm111111nnnnnddddd") -//INST(FCMGE_reg_1, "FCMGE (register)", "01111110010mmmmm001001nnnnnddddd") -INST(FCMGE_reg_2, "FCMGE (register)", "011111100z1mmmmm111001nnnnnddddd") -//INST(FACGE_1, "FACGE", "01111110010mmmmm001011nnnnnddddd") -INST(FACGE_2, "FACGE", "011111100z1mmmmm111011nnnnnddddd") -//INST(FABD_1, "FABD", "01111110110mmmmm000101nnnnnddddd") -INST(FABD_2, "FABD", "011111101z1mmmmm110101nnnnnddddd") -//INST(FCMGT_reg_1, "FCMGT (register)", "01111110110mmmmm001001nnnnnddddd") -INST(FCMGT_reg_2, "FCMGT (register)", "011111101z1mmmmm111001nnnnnddddd") -//INST(FACGT_1, "FACGT", "01111110110mmmmm001011nnnnnddddd") -INST(FACGT_2, "FACGT", "011111101z1mmmmm111011nnnnnddddd") - -// Data Processing - FP and SIMD - Scalar two register misc -//INST(FCVTNS_1, "FCVTNS (vector)", "0101111001111001101010nnnnnddddd") -INST(FCVTNS_2, "FCVTNS (vector)", "010111100z100001101010nnnnnddddd") -//INST(FCVTMS_1, "FCVTMS (vector)", "0101111001111001101110nnnnnddddd") -INST(FCVTMS_2, "FCVTMS (vector)", "010111100z100001101110nnnnnddddd") -//INST(FCVTAS_1, "FCVTAS (vector)", "0101111001111001110010nnnnnddddd") -INST(FCVTAS_2, "FCVTAS (vector)", "010111100z100001110010nnnnnddddd") -//INST(SCVTF_int_1, "SCVTF (vector, integer)", "0101111001111001110110nnnnnddddd") -INST(SCVTF_int_2, "SCVTF (vector, integer)", "010111100z100001110110nnnnnddddd") -//INST(FCMGT_zero_1, "FCMGT (zero)", "0101111011111000110010nnnnnddddd") -INST(FCMGT_zero_2, "FCMGT (zero)", "010111101z100000110010nnnnnddddd") -INST(FCMEQ_zero_1, "FCMEQ (zero)", "0101111011111000110110nnnnnddddd") -INST(FCMEQ_zero_2, "FCMEQ (zero)", "010111101z100000110110nnnnnddddd") -//INST(FCMLT_1, "FCMLT (zero)", "0101111011111000111010nnnnnddddd") -INST(FCMLT_2, "FCMLT (zero)", "010111101z100000111010nnnnnddddd") -//INST(FCVTPS_1, "FCVTPS (vector)", "0101111011111001101010nnnnnddddd") -INST(FCVTPS_2, "FCVTPS (vector)", "010111101z100001101010nnnnnddddd") -//INST(FCVTZS_int_1, "FCVTZS (vector, integer)", "0101111011111001101110nnnnnddddd") -INST(FCVTZS_int_2, "FCVTZS (vector, integer)", "010111101z100001101110nnnnnddddd") -INST(FRECPE_1, "FRECPE", "0101111011111001110110nnnnnddddd") -INST(FRECPE_2, "FRECPE", "010111101z100001110110nnnnnddddd") -INST(FRECPX_1, "FRECPX", "0101111011111001111110nnnnnddddd") -INST(FRECPX_2, "FRECPX", "010111101z100001111110nnnnnddddd") -//INST(FCVTNU_1, "FCVTNU (vector)", "0111111001111001101010nnnnnddddd") -INST(FCVTNU_2, "FCVTNU (vector)", "011111100z100001101010nnnnnddddd") -//INST(FCVTMU_1, "FCVTMU (vector)", "0111111001111001101110nnnnnddddd") -INST(FCVTMU_2, "FCVTMU (vector)", "011111100z100001101110nnnnnddddd") -//INST(FCVTAU_1, "FCVTAU (vector)", "0111111001111001110010nnnnnddddd") -INST(FCVTAU_2, "FCVTAU (vector)", "011111100z100001110010nnnnnddddd") -//INST(UCVTF_int_1, "UCVTF (vector, integer)", "0111111001111001110110nnnnnddddd") -INST(UCVTF_int_2, "UCVTF (vector, integer)", "011111100z100001110110nnnnnddddd") -//INST(FCMGE_zero_1, "FCMGE (zero)", "0111111011111000110010nnnnnddddd") -INST(FCMGE_zero_2, "FCMGE (zero)", "011111101z100000110010nnnnnddddd") -//INST(FCMLE_1, "FCMLE (zero)", "0111111011111000110110nnnnnddddd") -INST(FCMLE_2, "FCMLE (zero)", "011111101z100000110110nnnnnddddd") -//INST(FCVTPU_1, "FCVTPU (vector)", "0111111011111001101010nnnnnddddd") -INST(FCVTPU_2, "FCVTPU (vector)", "011111101z100001101010nnnnnddddd") -//INST(FCVTZU_int_1, "FCVTZU (vector, integer)", "0111111011111001101110nnnnnddddd") -INST(FCVTZU_int_2, "FCVTZU (vector, integer)", "011111101z100001101110nnnnnddddd") -INST(FRSQRTE_1, "FRSQRTE", "0111111011111001110110nnnnnddddd") -INST(FRSQRTE_2, "FRSQRTE", "011111101z100001110110nnnnnddddd") - -// Data Processing - FP and SIMD - Scalar three same extra -//INST(SQRDMLAH_vec_1, "SQRDMLAH (vector)", "01111110zz0mmmmm100001nnnnnddddd") -//INST(SQRDMLAH_vec_2, "SQRDMLAH (vector)", "0Q101110zz0mmmmm100001nnnnnddddd") -//INST(SQRDMLSH_vec_1, "SQRDMLSH (vector)", "01111110zz0mmmmm100011nnnnnddddd") -//INST(SQRDMLSH_vec_2, "SQRDMLSH (vector)", "0Q101110zz0mmmmm100011nnnnnddddd") - -// Data Processing - FP and SIMD - Scalar two-register misc -INST(SUQADD_1, "SUQADD", "01011110zz100000001110nnnnnddddd") -INST(SQABS_1, "SQABS", "01011110zz100000011110nnnnnddddd") -INST(CMGT_zero_1, "CMGT (zero)", "01011110zz100000100010nnnnnddddd") -INST(CMEQ_zero_1, "CMEQ (zero)", "01011110zz100000100110nnnnnddddd") -INST(CMLT_1, "CMLT (zero)", "01011110zz100000101010nnnnnddddd") -INST(ABS_1, "ABS", "01011110zz100000101110nnnnnddddd") -INST(SQXTN_1, "SQXTN, SQXTN2", "01011110zz100001010010nnnnnddddd") -INST(USQADD_1, "USQADD", "01111110zz100000001110nnnnnddddd") -INST(SQNEG_1, "SQNEG", "01111110zz100000011110nnnnnddddd") -INST(CMGE_zero_1, "CMGE (zero)", "01111110zz100000100010nnnnnddddd") -INST(CMLE_1, "CMLE (zero)", "01111110zz100000100110nnnnnddddd") -INST(NEG_1, "NEG (vector)", "01111110zz100000101110nnnnnddddd") -INST(SQXTUN_1, "SQXTUN, SQXTUN2", "01111110zz100001001010nnnnnddddd") -INST(UQXTN_1, "UQXTN, UQXTN2", "01111110zz100001010010nnnnnddddd") -INST(FCVTXN_1, "FCVTXN, FCVTXN2", "011111100z100001011010nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Scalar pairwise -INST(ADDP_pair, "ADDP (scalar)", "01011110zz110001101110nnnnnddddd") -//INST(FMAXNMP_pair_1, "FMAXNMP (scalar)", "0101111000110000110010nnnnnddddd") -INST(FMAXNMP_pair_2, "FMAXNMP (scalar)", "011111100z110000110010nnnnnddddd") -//INST(FADDP_pair_1, "FADDP (scalar)", "0101111000110000110110nnnnnddddd") -INST(FADDP_pair_2, "FADDP (scalar)", "011111100z110000110110nnnnnddddd") -//INST(FMAXP_pair_1, "FMAXP (scalar)", "0101111000110000111110nnnnnddddd") -INST(FMAXP_pair_2, "FMAXP (scalar)", "011111100z110000111110nnnnnddddd") -//INST(FMINNMP_pair_1, "FMINNMP (scalar)", "0101111010110000110010nnnnnddddd") -INST(FMINNMP_pair_2, "FMINNMP (scalar)", "011111101z110000110010nnnnnddddd") -//INST(FMINP_pair_1, "FMINP (scalar)", "0101111010110000111110nnnnnddddd") -INST(FMINP_pair_2, "FMINP (scalar)", "011111101z110000111110nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Scalar three different -//INST(SQDMLAL_vec_1, "SQDMLAL, SQDMLAL2 (vector)", "01011110zz1mmmmm100100nnnnnddddd") -//INST(SQDMLSL_vec_1, "SQDMLSL, SQDMLSL2 (vector)", "01011110zz1mmmmm101100nnnnnddddd") -//INST(SQDMULL_vec_1, "SQDMULL, SQDMULL2 (vector)", "01011110zz1mmmmm110100nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Scalar three same -INST(SQADD_1, "SQADD", "01011110zz1mmmmm000011nnnnnddddd") -INST(SQSUB_1, "SQSUB", "01011110zz1mmmmm001011nnnnnddddd") -INST(CMGT_reg_1, "CMGT (register)", "01011110zz1mmmmm001101nnnnnddddd") -INST(CMGE_reg_1, "CMGE (register)", "01011110zz1mmmmm001111nnnnnddddd") -INST(SSHL_1, "SSHL", "01011110zz1mmmmm010001nnnnnddddd") -INST(SQSHL_reg_1, "SQSHL (register)", "01011110zz1mmmmm010011nnnnnddddd") -INST(SRSHL_1, "SRSHL", "01011110zz1mmmmm010101nnnnnddddd") -//INST(SQRSHL_1, "SQRSHL", "01011110zz1mmmmm010111nnnnnddddd") -INST(ADD_1, "ADD (vector)", "01011110zz1mmmmm100001nnnnnddddd") -INST(CMTST_1, "CMTST", "01011110zz1mmmmm100011nnnnnddddd") -INST(SQDMULH_vec_1, "SQDMULH (vector)", "01011110zz1mmmmm101101nnnnnddddd") -INST(UQADD_1, "UQADD", "01111110zz1mmmmm000011nnnnnddddd") -INST(UQSUB_1, "UQSUB", "01111110zz1mmmmm001011nnnnnddddd") -INST(CMHI_1, "CMHI (register)", "01111110zz1mmmmm001101nnnnnddddd") -INST(CMHS_1, "CMHS (register)", "01111110zz1mmmmm001111nnnnnddddd") -INST(USHL_1, "USHL", "01111110zz1mmmmm010001nnnnnddddd") -INST(UQSHL_reg_1, "UQSHL (register)", "01111110zz1mmmmm010011nnnnnddddd") -INST(URSHL_1, "URSHL", "01111110zz1mmmmm010101nnnnnddddd") -//INST(UQRSHL_1, "UQRSHL", "01111110zz1mmmmm010111nnnnnddddd") -INST(SUB_1, "SUB (vector)", "01111110zz1mmmmm100001nnnnnddddd") -INST(CMEQ_reg_1, "CMEQ (register)", "01111110zz1mmmmm100011nnnnnddddd") -INST(SQRDMULH_vec_1, "SQRDMULH (vector)", "01111110zz1mmmmm101101nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Scalar shift by immediate -INST(SSHR_1, "SSHR", "010111110IIIIiii000001nnnnnddddd") -INST(SSRA_1, "SSRA", "010111110IIIIiii000101nnnnnddddd") -INST(SRSHR_1, "SRSHR", "010111110IIIIiii001001nnnnnddddd") -INST(SRSRA_1, "SRSRA", "010111110IIIIiii001101nnnnnddddd") -INST(SHL_1, "SHL", "010111110IIIIiii010101nnnnnddddd") -INST(SQSHL_imm_1, "SQSHL (immediate)", "010111110IIIIiii011101nnnnnddddd") -INST(SQSHRN_1, "SQSHRN, SQSHRN2", "010111110IIIIiii100101nnnnnddddd") -//INST(SQRSHRN_1, "SQRSHRN, SQRSHRN2", "010111110IIIIiii100111nnnnnddddd") -INST(SCVTF_fix_1, "SCVTF (vector, fixed-point)", "010111110IIIIiii111001nnnnnddddd") -INST(FCVTZS_fix_1, "FCVTZS (vector, fixed-point)", "010111110IIIIiii111111nnnnnddddd") -INST(USHR_1, "USHR", "011111110IIIIiii000001nnnnnddddd") -INST(USRA_1, "USRA", "011111110IIIIiii000101nnnnnddddd") -INST(URSHR_1, "URSHR", "011111110IIIIiii001001nnnnnddddd") -INST(URSRA_1, "URSRA", "011111110IIIIiii001101nnnnnddddd") -INST(SRI_1, "SRI", "011111110IIIIiii010001nnnnnddddd") -INST(SLI_1, "SLI", "011111110IIIIiii010101nnnnnddddd") -INST(SQSHLU_1, "SQSHLU", "011111110IIIIiii011001nnnnnddddd") -INST(UQSHL_imm_1, "UQSHL (immediate)", "011111110IIIIiii011101nnnnnddddd") -INST(SQSHRUN_1, "SQSHRUN, SQSHRUN2", "011111110IIIIiii100001nnnnnddddd") -//INST(SQRSHRUN_1, "SQRSHRUN, SQRSHRUN2", "011111110IIIIiii100011nnnnnddddd") -INST(UQSHRN_1, "UQSHRN, UQSHRN2", "011111110IIIIiii100101nnnnnddddd") -//INST(UQRSHRN_1, "UQRSHRN, UQRSHRN2", "011111110IIIIiii100111nnnnnddddd") -INST(UCVTF_fix_1, "UCVTF (vector, fixed-point)", "011111110IIIIiii111001nnnnnddddd") -INST(FCVTZU_fix_1, "FCVTZU (vector, fixed-point)", "011111110IIIIiii111111nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Scalar x indexed element -//INST(SQDMLAL_elt_1, "SQDMLAL, SQDMLAL2 (by element)", "01011111zzLMmmmm0011H0nnnnnddddd") -//INST(SQDMLSL_elt_1, "SQDMLSL, SQDMLSL2 (by element)", "01011111zzLMmmmm0111H0nnnnnddddd") -INST(SQDMULL_elt_1, "SQDMULL, SQDMULL2 (by element)", "01011111zzLMmmmm1011H0nnnnnddddd") -INST(SQDMULH_elt_1, "SQDMULH (by element)", "01011111zzLMmmmm1100H0nnnnnddddd") -INST(SQRDMULH_elt_1, "SQRDMULH (by element)", "01011111zzLMmmmm1101H0nnnnnddddd") -INST(FMLA_elt_1, "FMLA (by element)", "0101111100LMmmmm0001H0nnnnnddddd") -INST(FMLA_elt_2, "FMLA (by element)", "010111111zLMmmmm0001H0nnnnnddddd") -INST(FMLS_elt_1, "FMLS (by element)", "0101111100LMmmmm0101H0nnnnnddddd") -INST(FMLS_elt_2, "FMLS (by element)", "010111111zLMmmmm0101H0nnnnnddddd") -//INST(FMUL_elt_1, "FMUL (by element)", "0101111100LMmmmm1001H0nnnnnddddd") -INST(FMUL_elt_2, "FMUL (by element)", "010111111zLMmmmm1001H0nnnnnddddd") -//INST(SQRDMLAH_elt_1, "SQRDMLAH (by element)", "01111111zzLMmmmm1101H0nnnnnddddd") -//INST(SQRDMLSH_elt_1, "SQRDMLSH (by element)", "01111111zzLMmmmm1111H0nnnnnddddd") -//INST(FMULX_elt_1, "FMULX (by element)", "0111111100LMmmmm1001H0nnnnnddddd") -INST(FMULX_elt_2, "FMULX (by element)", "011111111zLMmmmm1001H0nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Table Lookup -INST(TBL, "TBL", "0Q001110000mmmmm0LL000nnnnnddddd") -INST(TBX, "TBX", "0Q001110000mmmmm0LL100nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Permute -INST(UZP1, "UZP1", "0Q001110zz0mmmmm000110nnnnnddddd") -INST(TRN1, "TRN1", "0Q001110zz0mmmmm001010nnnnnddddd") -INST(ZIP1, "ZIP1", "0Q001110zz0mmmmm001110nnnnnddddd") -INST(UZP2, "UZP2", "0Q001110zz0mmmmm010110nnnnnddddd") -INST(TRN2, "TRN2", "0Q001110zz0mmmmm011010nnnnnddddd") -INST(ZIP2, "ZIP2", "0Q001110zz0mmmmm011110nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Extract -INST(EXT, "EXT", "0Q101110000mmmmm0iiii0nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Copy -INST(DUP_elt_2, "DUP (element)", "0Q001110000iiiii000001nnnnnddddd") -INST(DUP_gen, "DUP (general)", "0Q001110000iiiii000011nnnnnddddd") -INST(SMOV, "SMOV", "0Q001110000iiiii001011nnnnnddddd") -INST(UMOV, "UMOV", "0Q001110000iiiii001111nnnnnddddd") -INST(INS_gen, "INS (general)", "01001110000iiiii000111nnnnnddddd") -INST(INS_elt, "INS (element)", "01101110000iiiii0iiii1nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Three same -//INST(FMULX_vec_3, "FMULX", "0Q001110010mmmmm000111nnnnnddddd") -INST(FCMEQ_reg_3, "FCMEQ (register)", "0Q001110010mmmmm001001nnnnnddddd") -INST(FRECPS_3, "FRECPS", "0Q001110010mmmmm001111nnnnnddddd") -INST(FRSQRTS_3, "FRSQRTS", "0Q001110110mmmmm001111nnnnnddddd") -//INST(FCMGE_reg_3, "FCMGE (register)", "0Q101110010mmmmm001001nnnnnddddd") -//INST(FACGE_3, "FACGE", "0Q101110010mmmmm001011nnnnnddddd") -//INST(FABD_3, "FABD", "0Q101110110mmmmm000101nnnnnddddd") -//INST(FCMGT_reg_3, "FCMGT (register)", "0Q101110110mmmmm001001nnnnnddddd") -//INST(FACGT_3, "FACGT", "0Q101110110mmmmm001011nnnnnddddd") -//INST(FMAXNM_1, "FMAXNM (vector)", "0Q001110010mmmmm000001nnnnnddddd") -INST(FMLA_vec_1, "FMLA (vector)", "0Q001110010mmmmm000011nnnnnddddd") -//INST(FADD_1, "FADD (vector)", "0Q001110010mmmmm000101nnnnnddddd") -//INST(FMAX_1, "FMAX (vector)", "0Q001110010mmmmm001101nnnnnddddd") -//INST(FMINNM_1, "FMINNM (vector)", "0Q001110110mmmmm000001nnnnnddddd") -INST(FMLS_vec_1, "FMLS (vector)", "0Q001110110mmmmm000011nnnnnddddd") -//INST(FSUB_1, "FSUB (vector)", "0Q001110110mmmmm000101nnnnnddddd") -//INST(FMIN_1, "FMIN (vector)", "0Q001110110mmmmm001101nnnnnddddd") -//INST(FMAXNMP_vec_1, "FMAXNMP (vector)", "0Q101110010mmmmm000001nnnnnddddd") -//INST(FADDP_vec_1, "FADDP (vector)", "0Q101110010mmmmm000101nnnnnddddd") -//INST(FMUL_vec_1, "FMUL (vector)", "0Q101110010mmmmm000111nnnnnddddd") -//INST(FMAXP_vec_1, "FMAXP (vector)", "0Q101110010mmmmm001101nnnnnddddd") -//INST(FDIV_1, "FDIV (vector)", "0Q101110010mmmmm001111nnnnnddddd") -//INST(FMINNMP_vec_1, "FMINNMP (vector)", "0Q101110110mmmmm000001nnnnnddddd") -//INST(FMINP_vec_1, "FMINP (vector)", "0Q101110110mmmmm001101nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Three same extra -//INST(SMMLA_vec, "SMMLA", "01001110100mmmmm101001nnnnnddddd") // v8.6 -//INST(UMMLA_vec, "UMMLA", "01101110100mmmmm101001nnnnnddddd") // v8.6 -//INST(USMMLA_vec, "USMMLA", "01001110100mmmmm101011nnnnnddddd") // v8.6 -//INST(SUDOT_element, "SUDOT (by element)", "0Q00111100LMmmmm1111H0nnnnnddddd") // v8.6 -//INST(USDOT_element, "USDOT (by_element)", "0Q00111110LMmmmm1111H0nnnnnddddd") // v8.6 -//INST(USDOT_vec, "USDOT (vector)", "0Q001110100mmmmm100111nnnnnddddd") // v8.6 -INST(SDOT_vec, "SDOT (vector)", "0Q001110zz0mmmmm100101nnnnnddddd") -INST(UDOT_vec, "UDOT (vector)", "0Q101110zz0mmmmm100101nnnnnddddd") -INST(FCMLA_vec, "FCMLA", "0Q101110zz0mmmmm110rr1nnnnnddddd") -INST(FCADD_vec, "FCADD", "0Q101110zz0mmmmm111r01nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD Two-register misc -INST(REV64_asimd, "REV64", "0Q001110zz100000000010nnnnnddddd") -INST(REV16_asimd, "REV16 (vector)", "0Q001110zz100000000110nnnnnddddd") -INST(SADDLP, "SADDLP", "0Q001110zz100000001010nnnnnddddd") -INST(SUQADD_2, "SUQADD", "0Q001110zz100000001110nnnnnddddd") -INST(CLS_asimd, "CLS (vector)", "0Q001110zz100000010010nnnnnddddd") -INST(CNT, "CNT", "0Q001110zz100000010110nnnnnddddd") -INST(SADALP, "SADALP", "0Q001110zz100000011010nnnnnddddd") -INST(SQABS_2, "SQABS", "0Q001110zz100000011110nnnnnddddd") -INST(CMGT_zero_2, "CMGT (zero)", "0Q001110zz100000100010nnnnnddddd") -INST(CMEQ_zero_2, "CMEQ (zero)", "0Q001110zz100000100110nnnnnddddd") -INST(CMLT_2, "CMLT (zero)", "0Q001110zz100000101010nnnnnddddd") -INST(ABS_2, "ABS", "0Q001110zz100000101110nnnnnddddd") -INST(XTN, "XTN, XTN2", "0Q001110zz100001001010nnnnnddddd") -INST(SQXTN_2, "SQXTN, SQXTN2", "0Q001110zz100001010010nnnnnddddd") -INST(FCVTN, "FCVTN, FCVTN2", "0Q0011100z100001011010nnnnnddddd") -INST(FCVTL, "FCVTL, FCVTL2", "0Q0011100z100001011110nnnnnddddd") -INST(FRINTN_1, "FRINTN (vector)", "0Q00111001111001100010nnnnnddddd") -INST(FRINTN_2, "FRINTN (vector)", "0Q0011100z100001100010nnnnnddddd") -INST(FRINTM_1, "FRINTM (vector)", "0Q00111001111001100110nnnnnddddd") -INST(FRINTM_2, "FRINTM (vector)", "0Q0011100z100001100110nnnnnddddd") -//INST(FCVTNS_3, "FCVTNS (vector)", "0Q00111001111001101010nnnnnddddd") -INST(FCVTNS_4, "FCVTNS (vector)", "0Q0011100z100001101010nnnnnddddd") -//INST(FCVTMS_3, "FCVTMS (vector)", "0Q00111001111001101110nnnnnddddd") -INST(FCVTMS_4, "FCVTMS (vector)", "0Q0011100z100001101110nnnnnddddd") -//INST(FCVTAS_3, "FCVTAS (vector)", "0Q00111001111001110010nnnnnddddd") -INST(FCVTAS_4, "FCVTAS (vector)", "0Q0011100z100001110010nnnnnddddd") -//INST(SCVTF_int_3, "SCVTF (vector, integer)", "0Q00111001111001110110nnnnnddddd") -INST(SCVTF_int_4, "SCVTF (vector, integer)", "0Q0011100z100001110110nnnnnddddd") -//INST(FCMGT_zero_3, "FCMGT (zero)", "0Q00111011111000110010nnnnnddddd") -INST(FCMGT_zero_4, "FCMGT (zero)", "0Q0011101z100000110010nnnnnddddd") -INST(FCMEQ_zero_3, "FCMEQ (zero)", "0Q00111011111000110110nnnnnddddd") -INST(FCMEQ_zero_4, "FCMEQ (zero)", "0Q0011101z100000110110nnnnnddddd") -//INST(FCMLT_3, "FCMLT (zero)", "0Q00111011111000111010nnnnnddddd") -INST(FCMLT_4, "FCMLT (zero)", "0Q0011101z100000111010nnnnnddddd") -INST(FABS_1, "FABS (vector)", "0Q00111011111000111110nnnnnddddd") -INST(FABS_2, "FABS (vector)", "0Q0011101z100000111110nnnnnddddd") -INST(FRINTP_1, "FRINTP (vector)", "0Q00111011111001100010nnnnnddddd") -INST(FRINTP_2, "FRINTP (vector)", "0Q0011101z100001100010nnnnnddddd") -INST(FRINTZ_1, "FRINTZ (vector)", "0Q00111011111001100110nnnnnddddd") -INST(FRINTZ_2, "FRINTZ (vector)", "0Q0011101z100001100110nnnnnddddd") -//INST(FCVTPS_3, "FCVTPS (vector)", "0Q00111011111001101010nnnnnddddd") -INST(FCVTPS_4, "FCVTPS (vector)", "0Q0011101z100001101010nnnnnddddd") -//INST(FCVTZS_int_3, "FCVTZS (vector, integer)", "0Q00111011111001101110nnnnnddddd") -INST(FCVTZS_int_4, "FCVTZS (vector, integer)", "0Q0011101z100001101110nnnnnddddd") -INST(URECPE, "URECPE", "0Q0011101z100001110010nnnnnddddd") -INST(FRECPE_3, "FRECPE", "0Q00111011111001110110nnnnnddddd") -INST(FRECPE_4, "FRECPE", "0Q0011101z100001110110nnnnnddddd") -INST(REV32_asimd, "REV32 (vector)", "0Q101110zz100000000010nnnnnddddd") -INST(UADDLP, "UADDLP", "0Q101110zz100000001010nnnnnddddd") -INST(USQADD_2, "USQADD", "0Q101110zz100000001110nnnnnddddd") -INST(CLZ_asimd, "CLZ (vector)", "0Q101110zz100000010010nnnnnddddd") -INST(UADALP, "UADALP", "0Q101110zz100000011010nnnnnddddd") -INST(SQNEG_2, "SQNEG", "0Q101110zz100000011110nnnnnddddd") -INST(CMGE_zero_2, "CMGE (zero)", "0Q101110zz100000100010nnnnnddddd") -INST(CMLE_2, "CMLE (zero)", "0Q101110zz100000100110nnnnnddddd") -INST(NEG_2, "NEG (vector)", "0Q101110zz100000101110nnnnnddddd") -INST(SQXTUN_2, "SQXTUN, SQXTUN2", "0Q101110zz100001001010nnnnnddddd") -INST(SHLL, "SHLL, SHLL2", "0Q101110zz100001001110nnnnnddddd") -INST(UQXTN_2, "UQXTN, UQXTN2", "0Q101110zz100001010010nnnnnddddd") -INST(FCVTXN_2, "FCVTXN, FCVTXN2", "0Q1011100z100001011010nnnnnddddd") -INST(FRINTA_1, "FRINTA (vector)", "0Q10111001111001100010nnnnnddddd") -INST(FRINTA_2, "FRINTA (vector)", "0Q1011100z100001100010nnnnnddddd") -INST(FRINTX_1, "FRINTX (vector)", "0Q10111001111001100110nnnnnddddd") -INST(FRINTX_2, "FRINTX (vector)", "0Q1011100z100001100110nnnnnddddd") -//INST(FCVTNU_3, "FCVTNU (vector)", "0Q10111001111001101010nnnnnddddd") -INST(FCVTNU_4, "FCVTNU (vector)", "0Q1011100z100001101010nnnnnddddd") -//INST(FCVTMU_3, "FCVTMU (vector)", "0Q10111001111001101110nnnnnddddd") -INST(FCVTMU_4, "FCVTMU (vector)", "0Q1011100z100001101110nnnnnddddd") -//INST(FCVTAU_3, "FCVTAU (vector)", "0Q10111001111001110010nnnnnddddd") -INST(FCVTAU_4, "FCVTAU (vector)", "0Q1011100z100001110010nnnnnddddd") -//INST(UCVTF_int_3, "UCVTF (vector, integer)", "0Q10111001111001110110nnnnnddddd") -INST(UCVTF_int_4, "UCVTF (vector, integer)", "0Q1011100z100001110110nnnnnddddd") -INST(NOT, "NOT", "0Q10111000100000010110nnnnnddddd") -INST(RBIT_asimd, "RBIT (vector)", "0Q10111001100000010110nnnnnddddd") -INST(FNEG_1, "FNEG (vector)", "0Q10111011111000111110nnnnnddddd") -INST(FNEG_2, "FNEG (vector)", "0Q1011101z100000111110nnnnnddddd") -INST(FRINTI_1, "FRINTI (vector)", "0Q10111011111001100110nnnnnddddd") -INST(FRINTI_2, "FRINTI (vector)", "0Q1011101z100001100110nnnnnddddd") -//INST(FCMGE_zero_3, "FCMGE (zero)", "0Q10111011111000110010nnnnnddddd") -INST(FCMGE_zero_4, "FCMGE (zero)", "0Q1011101z100000110010nnnnnddddd") -//INST(FCMLE_3, "FCMLE (zero)", "0Q10111011111000110110nnnnnddddd") -INST(FCMLE_4, "FCMLE (zero)", "0Q1011101z100000110110nnnnnddddd") -//INST(FCVTPU_3, "FCVTPU (vector)", "0Q10111011111001101010nnnnnddddd") -INST(FCVTPU_4, "FCVTPU (vector)", "0Q1011101z100001101010nnnnnddddd") -//INST(FCVTZU_int_3, "FCVTZU (vector, integer)", "0Q10111011111001101110nnnnnddddd") -INST(FCVTZU_int_4, "FCVTZU (vector, integer)", "0Q1011101z100001101110nnnnnddddd") -INST(URSQRTE, "URSQRTE", "0Q1011101z100001110010nnnnnddddd") -INST(FRSQRTE_3, "FRSQRTE", "0Q10111011111001110110nnnnnddddd") -INST(FRSQRTE_4, "FRSQRTE", "0Q1011101z100001110110nnnnnddddd") -//INST(FSQRT_1, "FSQRT (vector)", "0Q10111011111001111110nnnnnddddd") -INST(FSQRT_2, "FSQRT (vector)", "0Q1011101z100001111110nnnnnddddd") -//INST(FRINT32X_1, "FRINT32X (vector)", "0Q1011100z100001111110nnnnnddddd") // ARMv8.5 -//INST(FRINT64X_1, "FRINT64X (vector)", "0Q1011100z100001111010nnnnnddddd") // ARMv8.5 -//INST(FRINT32Z_1, "FRINT32Z (vector)", "0Q0011100z100001111010nnnnnddddd") // ARMv8.5 -//INST(FRINT64Z_1, "FRINT64Z (vector)", "0Q0011100z100001111110nnnnnddddd") // ARMv8.5 - -// Data Processing - FP and SIMD - SIMD across lanes -INST(SADDLV, "SADDLV", "0Q001110zz110000001110nnnnnddddd") -INST(SMAXV, "SMAXV", "0Q001110zz110000101010nnnnnddddd") -INST(SMINV, "SMINV", "0Q001110zz110001101010nnnnnddddd") -INST(ADDV, "ADDV", "0Q001110zz110001101110nnnnnddddd") -//INST(FMAXNMV_1, "FMAXNMV", "0Q00111000110000110010nnnnnddddd") -INST(FMAXNMV_2, "FMAXNMV", "0Q1011100z110000110010nnnnnddddd") -//INST(FMAXV_1, "FMAXV", "0Q00111000110000111110nnnnnddddd") -INST(FMAXV_2, "FMAXV", "0Q1011100z110000111110nnnnnddddd") -//INST(FMINNMV_1, "FMINNMV", "0Q00111010110000110010nnnnnddddd") -INST(FMINNMV_2, "FMINNMV", "0Q1011101z110000110010nnnnnddddd") -//INST(FMINV_1, "FMINV", "0Q00111010110000111110nnnnnddddd") -INST(FMINV_2, "FMINV", "0Q1011101z110000111110nnnnnddddd") -INST(UADDLV, "UADDLV", "0Q101110zz110000001110nnnnnddddd") -INST(UMAXV, "UMAXV", "0Q101110zz110000101010nnnnnddddd") -INST(UMINV, "UMINV", "0Q101110zz110001101010nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD three different -INST(SADDL, "SADDL, SADDL2", "0Q001110zz1mmmmm000000nnnnnddddd") -INST(SADDW, "SADDW, SADDW2", "0Q001110zz1mmmmm000100nnnnnddddd") -INST(SSUBL, "SSUBL, SSUBL2", "0Q001110zz1mmmmm001000nnnnnddddd") -INST(SSUBW, "SSUBW, SSUBW2", "0Q001110zz1mmmmm001100nnnnnddddd") -INST(ADDHN, "ADDHN, ADDHN2", "0Q001110zz1mmmmm010000nnnnnddddd") -INST(SABAL, "SABAL, SABAL2", "0Q001110zz1mmmmm010100nnnnnddddd") -INST(SUBHN, "SUBHN, SUBHN2", "0Q001110zz1mmmmm011000nnnnnddddd") -INST(SABDL, "SABDL, SABDL2", "0Q001110zz1mmmmm011100nnnnnddddd") -INST(SMLAL_vec, "SMLAL, SMLAL2 (vector)", "0Q001110zz1mmmmm100000nnnnnddddd") -INST(SMLSL_vec, "SMLSL, SMLSL2 (vector)", "0Q001110zz1mmmmm101000nnnnnddddd") -INST(SMULL_vec, "SMULL, SMULL2 (vector)", "0Q001110zz1mmmmm110000nnnnnddddd") -INST(PMULL, "PMULL, PMULL2", "0Q001110zz1mmmmm111000nnnnnddddd") -INST(UADDL, "UADDL, UADDL2", "0Q101110zz1mmmmm000000nnnnnddddd") -INST(UADDW, "UADDW, UADDW2", "0Q101110zz1mmmmm000100nnnnnddddd") -INST(USUBL, "USUBL, USUBL2", "0Q101110zz1mmmmm001000nnnnnddddd") -INST(USUBW, "USUBW, USUBW2", "0Q101110zz1mmmmm001100nnnnnddddd") -INST(RADDHN, "RADDHN, RADDHN2", "0Q101110zz1mmmmm010000nnnnnddddd") -INST(UABAL, "UABAL, UABAL2", "0Q101110zz1mmmmm010100nnnnnddddd") -INST(RSUBHN, "RSUBHN, RSUBHN2", "0Q101110zz1mmmmm011000nnnnnddddd") -INST(UABDL, "UABDL, UABDL2", "0Q101110zz1mmmmm011100nnnnnddddd") -INST(UMLAL_vec, "UMLAL, UMLAL2 (vector)", "0Q101110zz1mmmmm100000nnnnnddddd") -INST(UMLSL_vec, "UMLSL, UMLSL2 (vector)", "0Q101110zz1mmmmm101000nnnnnddddd") -INST(UMULL_vec, "UMULL, UMULL2 (vector)", "0Q101110zz1mmmmm110000nnnnnddddd") -//INST(SQDMLAL_vec_2, "SQDMLAL, SQDMLAL2 (vector)", "0Q001110zz1mmmmm100100nnnnnddddd") -//INST(SQDMLSL_vec_2, "SQDMLSL, SQDMLSL2 (vector)", "0Q001110zz1mmmmm101100nnnnnddddd") -INST(SQDMULL_vec_2, "SQDMULL, SQDMULL2 (vector)", "0Q001110zz1mmmmm110100nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD three same -INST(SHADD, "SHADD", "0Q001110zz1mmmmm000001nnnnnddddd") -INST(SQADD_2, "SQADD", "0Q001110zz1mmmmm000011nnnnnddddd") -INST(SRHADD, "SRHADD", "0Q001110zz1mmmmm000101nnnnnddddd") -INST(SHSUB, "SHSUB", "0Q001110zz1mmmmm001001nnnnnddddd") -INST(SQSUB_2, "SQSUB", "0Q001110zz1mmmmm001011nnnnnddddd") -INST(CMGT_reg_2, "CMGT (register)", "0Q001110zz1mmmmm001101nnnnnddddd") -INST(CMGE_reg_2, "CMGE (register)", "0Q001110zz1mmmmm001111nnnnnddddd") -INST(SSHL_2, "SSHL", "0Q001110zz1mmmmm010001nnnnnddddd") -INST(SQSHL_reg_2, "SQSHL (register)", "0Q001110zz1mmmmm010011nnnnnddddd") -INST(SRSHL_2, "SRSHL", "0Q001110zz1mmmmm010101nnnnnddddd") -//INST(SQRSHL_2, "SQRSHL", "0Q001110zz1mmmmm010111nnnnnddddd") -INST(SMAX, "SMAX", "0Q001110zz1mmmmm011001nnnnnddddd") -INST(SMIN, "SMIN", "0Q001110zz1mmmmm011011nnnnnddddd") -INST(SABD, "SABD", "0Q001110zz1mmmmm011101nnnnnddddd") -INST(SABA, "SABA", "0Q001110zz1mmmmm011111nnnnnddddd") -INST(ADD_vector, "ADD (vector)", "0Q001110zz1mmmmm100001nnnnnddddd") -INST(CMTST_2, "CMTST", "0Q001110zz1mmmmm100011nnnnnddddd") -INST(MLA_vec, "MLA (vector)", "0Q001110zz1mmmmm100101nnnnnddddd") -INST(MUL_vec, "MUL (vector)", "0Q001110zz1mmmmm100111nnnnnddddd") -INST(SMAXP, "SMAXP", "0Q001110zz1mmmmm101001nnnnnddddd") -INST(SMINP, "SMINP", "0Q001110zz1mmmmm101011nnnnnddddd") -INST(SQDMULH_vec_2, "SQDMULH (vector)", "0Q001110zz1mmmmm101101nnnnnddddd") -INST(ADDP_vec, "ADDP (vector)", "0Q001110zz1mmmmm101111nnnnnddddd") -INST(FMAXNM_2, "FMAXNM (vector)", "0Q0011100z1mmmmm110001nnnnnddddd") -INST(FMLA_vec_2, "FMLA (vector)", "0Q0011100z1mmmmm110011nnnnnddddd") -INST(FADD_2, "FADD (vector)", "0Q0011100z1mmmmm110101nnnnnddddd") -INST(FMAX_2, "FMAX (vector)", "0Q0011100z1mmmmm111101nnnnnddddd") -INST(FMULX_vec_4, "FMULX", "0Q0011100z1mmmmm110111nnnnnddddd") -INST(FCMEQ_reg_4, "FCMEQ (register)", "0Q0011100z1mmmmm111001nnnnnddddd") -//INST(FMLAL_vec_1, "FMLAL, FMLAL2 (vector)", "0Q0011100z1mmmmm111011nnnnnddddd") -INST(FRECPS_4, "FRECPS", "0Q0011100z1mmmmm111111nnnnnddddd") -INST(AND_asimd, "AND (vector)", "0Q001110001mmmmm000111nnnnnddddd") -INST(BIC_asimd_reg, "BIC (vector, register)", "0Q001110011mmmmm000111nnnnnddddd") -INST(FMINNM_2, "FMINNM (vector)", "0Q0011101z1mmmmm110001nnnnnddddd") -INST(FMLS_vec_2, "FMLS (vector)", "0Q0011101z1mmmmm110011nnnnnddddd") -INST(FSUB_2, "FSUB (vector)", "0Q0011101z1mmmmm110101nnnnnddddd") -//INST(FMLSL_vec_1, "FMLSL, FMLSL2 (vector)", "0Q0011101z1mmmmm111011nnnnnddddd") -INST(FMIN_2, "FMIN (vector)", "0Q0011101z1mmmmm111101nnnnnddddd") -INST(FRSQRTS_4, "FRSQRTS", "0Q0011101z1mmmmm111111nnnnnddddd") -INST(ORR_asimd_reg, "ORR (vector, register)", "0Q001110101mmmmm000111nnnnnddddd") -INST(ORN_asimd, "ORN (vector)", "0Q001110111mmmmm000111nnnnnddddd") -INST(UHADD, "UHADD", "0Q101110zz1mmmmm000001nnnnnddddd") -INST(UQADD_2, "UQADD", "0Q101110zz1mmmmm000011nnnnnddddd") -INST(URHADD, "URHADD", "0Q101110zz1mmmmm000101nnnnnddddd") -INST(UHSUB, "UHSUB", "0Q101110zz1mmmmm001001nnnnnddddd") -INST(UQSUB_2, "UQSUB", "0Q101110zz1mmmmm001011nnnnnddddd") -INST(CMHI_2, "CMHI (register)", "0Q101110zz1mmmmm001101nnnnnddddd") -INST(CMHS_2, "CMHS (register)", "0Q101110zz1mmmmm001111nnnnnddddd") -INST(USHL_2, "USHL", "0Q101110zz1mmmmm010001nnnnnddddd") -INST(UQSHL_reg_2, "UQSHL (register)", "0Q101110zz1mmmmm010011nnnnnddddd") -INST(URSHL_2, "URSHL", "0Q101110zz1mmmmm010101nnnnnddddd") -//INST(UQRSHL_2, "UQRSHL", "0Q101110zz1mmmmm010111nnnnnddddd") -INST(UMAX, "UMAX", "0Q101110zz1mmmmm011001nnnnnddddd") -INST(UMIN, "UMIN", "0Q101110zz1mmmmm011011nnnnnddddd") -INST(UABD, "UABD", "0Q101110zz1mmmmm011101nnnnnddddd") -INST(UABA, "UABA", "0Q101110zz1mmmmm011111nnnnnddddd") -INST(SUB_2, "SUB (vector)", "0Q101110zz1mmmmm100001nnnnnddddd") -INST(CMEQ_reg_2, "CMEQ (register)", "0Q101110zz1mmmmm100011nnnnnddddd") -INST(MLS_vec, "MLS (vector)", "0Q101110zz1mmmmm100101nnnnnddddd") -INST(PMUL, "PMUL", "0Q101110zz1mmmmm100111nnnnnddddd") -INST(UMAXP, "UMAXP", "0Q101110zz1mmmmm101001nnnnnddddd") -INST(UMINP, "UMINP", "0Q101110zz1mmmmm101011nnnnnddddd") -INST(SQRDMULH_vec_2, "SQRDMULH (vector)", "0Q101110zz1mmmmm101101nnnnnddddd") -INST(FMAXNMP_vec_2, "FMAXNMP (vector)", "0Q1011100z1mmmmm110001nnnnnddddd") -//INST(FMLAL_vec_2, "FMLAL, FMLAL2 (vector)", "0Q1011100z1mmmmm110011nnnnnddddd") -INST(FADDP_vec_2, "FADDP (vector)", "0Q1011100z1mmmmm110101nnnnnddddd") -INST(FMUL_vec_2, "FMUL (vector)", "0Q1011100z1mmmmm110111nnnnnddddd") -INST(FCMGE_reg_4, "FCMGE (register)", "0Q1011100z1mmmmm111001nnnnnddddd") -INST(FACGE_4, "FACGE", "0Q1011100z1mmmmm111011nnnnnddddd") -INST(FMAXP_vec_2, "FMAXP (vector)", "0Q1011100z1mmmmm111101nnnnnddddd") -INST(FDIV_2, "FDIV (vector)", "0Q1011100z1mmmmm111111nnnnnddddd") -INST(EOR_asimd, "EOR (vector)", "0Q101110001mmmmm000111nnnnnddddd") -INST(BSL, "BSL", "0Q101110011mmmmm000111nnnnnddddd") -INST(FMINNMP_vec_2, "FMINNMP (vector)", "0Q1011101z1mmmmm110001nnnnnddddd") -//INST(FMLSL_vec_2, "FMLSL, FMLSL2 (vector)", "0Q1011101z1mmmmm110011nnnnnddddd") -INST(FABD_4, "FABD", "0Q1011101z1mmmmm110101nnnnnddddd") -INST(FCMGT_reg_4, "FCMGT (register)", "0Q1011101z1mmmmm111001nnnnnddddd") -INST(FACGT_4, "FACGT", "0Q1011101z1mmmmm111011nnnnnddddd") -INST(FMINP_vec_2, "FMINP (vector)", "0Q1011101z1mmmmm111101nnnnnddddd") -INST(BIT, "BIT", "0Q101110101mmmmm000111nnnnnddddd") -INST(BIF, "BIF", "0Q101110111mmmmm000111nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD modified immediate -INST(MOVI, "MOVI, MVNI, ORR, BIC (vector, immediate)", "0Qo0111100000abcmmmm01defghddddd") -INST(FMOV_2, "FMOV (vector, immediate)", "0Qo0111100000abc111101defghddddd") -INST(FMOV_3, "FMOV (vector, immediate)", "0Q00111100000abc111111defghddddd") -INST(UnallocatedEncoding, "Unallocated SIMD modified immediate", "0--0111100000-------11----------") - -// Data Processing - FP and SIMD - SIMD Shift by immediate -INST(SSHR_2, "SSHR", "0Q0011110IIIIiii000001nnnnnddddd") -INST(SSRA_2, "SSRA", "0Q0011110IIIIiii000101nnnnnddddd") -INST(SRSHR_2, "SRSHR", "0Q0011110IIIIiii001001nnnnnddddd") -INST(SRSRA_2, "SRSRA", "0Q0011110IIIIiii001101nnnnnddddd") -INST(SHL_2, "SHL", "0Q0011110IIIIiii010101nnnnnddddd") -INST(SQSHL_imm_2, "SQSHL (immediate)", "0Q0011110IIIIiii011101nnnnnddddd") -INST(SHRN, "SHRN, SHRN2", "0Q0011110IIIIiii100001nnnnnddddd") -INST(RSHRN, "RSHRN, RSHRN2", "0Q0011110IIIIiii100011nnnnnddddd") -INST(SQSHRN_2, "SQSHRN, SQSHRN2", "0Q0011110IIIIiii100101nnnnnddddd") -INST(SQRSHRN_2, "SQRSHRN, SQRSHRN2", "0Q0011110IIIIiii100111nnnnnddddd") -INST(SSHLL, "SSHLL, SSHLL2", "0Q0011110IIIIiii101001nnnnnddddd") -INST(SCVTF_fix_2, "SCVTF (vector, fixed-point)", "0Q0011110IIIIiii111001nnnnnddddd") -INST(FCVTZS_fix_2, "FCVTZS (vector, fixed-point)", "0Q0011110IIIIiii111111nnnnnddddd") -INST(USHR_2, "USHR", "0Q1011110IIIIiii000001nnnnnddddd") -INST(USRA_2, "USRA", "0Q1011110IIIIiii000101nnnnnddddd") -INST(URSHR_2, "URSHR", "0Q1011110IIIIiii001001nnnnnddddd") -INST(URSRA_2, "URSRA", "0Q1011110IIIIiii001101nnnnnddddd") -INST(SRI_2, "SRI", "0Q1011110IIIIiii010001nnnnnddddd") -INST(SLI_2, "SLI", "0Q1011110IIIIiii010101nnnnnddddd") -INST(SQSHLU_2, "SQSHLU", "0Q1011110IIIIiii011001nnnnnddddd") -INST(UQSHL_imm_2, "UQSHL (immediate)", "0Q1011110IIIIiii011101nnnnnddddd") -INST(SQSHRUN_2, "SQSHRUN, SQSHRUN2", "0Q1011110IIIIiii100001nnnnnddddd") -INST(SQRSHRUN_2, "SQRSHRUN, SQRSHRUN2", "0Q1011110IIIIiii100011nnnnnddddd") -INST(UQSHRN_2, "UQSHRN, UQSHRN2", "0Q1011110IIIIiii100101nnnnnddddd") -INST(UQRSHRN_2, "UQRSHRN, UQRSHRN2", "0Q1011110IIIIiii100111nnnnnddddd") -INST(USHLL, "USHLL, USHLL2", "0Q1011110IIIIiii101001nnnnnddddd") -INST(UCVTF_fix_2, "UCVTF (vector, fixed-point)", "0Q1011110IIIIiii111001nnnnnddddd") -INST(FCVTZU_fix_2, "FCVTZU (vector, fixed-point)", "0Q1011110IIIIiii111111nnnnnddddd") - -// Data Processing - FP and SIMD - SIMD vector x indexed element -INST(SMLAL_elt, "SMLAL, SMLAL2 (by element)", "0Q001111zzLMmmmm0010H0nnnnnddddd") -//INST(SQDMLAL_elt_2, "SQDMLAL, SQDMLAL2 (by element)", "0Q001111zzLMmmmm0011H0nnnnnddddd") -INST(SMLSL_elt, "SMLSL, SMLSL2 (by element)", "0Q001111zzLMmmmm0110H0nnnnnddddd") -//INST(SQDMLSL_elt_2, "SQDMLSL, SQDMLSL2 (by element)", "0Q001111zzLMmmmm0111H0nnnnnddddd") -INST(MUL_elt, "MUL (by element)", "0Q001111zzLMmmmm1000H0nnnnnddddd") -INST(SMULL_elt, "SMULL, SMULL2 (by element)", "0Q001111zzLMmmmm1010H0nnnnnddddd") -INST(SQDMULL_elt_2, "SQDMULL, SQDMULL2 (by element)", "0Q001111zzLMmmmm1011H0nnnnnddddd") -INST(SQDMULH_elt_2, "SQDMULH (by element)", "0Q001111zzLMmmmm1100H0nnnnnddddd") -INST(SQRDMULH_elt_2, "SQRDMULH (by element)", "0Q001111zzLMmmmm1101H0nnnnnddddd") -INST(SDOT_elt, "SDOT (by element)", "0Q001111zzLMmmmm1110H0nnnnnddddd") -INST(FMLA_elt_3, "FMLA (by element)", "0Q00111100LMmmmm0001H0nnnnnddddd") -INST(FMLA_elt_4, "FMLA (by element)", "0Q0011111zLMmmmm0001H0nnnnnddddd") -INST(FMLS_elt_3, "FMLS (by element)", "0Q00111100LMmmmm0101H0nnnnnddddd") -INST(FMLS_elt_4, "FMLS (by element)", "0Q0011111zLMmmmm0101H0nnnnnddddd") -//INST(FMUL_elt_3, "FMUL (by element)", "0Q00111100LMmmmm1001H0nnnnnddddd") -INST(FMUL_elt_4, "FMUL (by element)", "0Q0011111zLMmmmm1001H0nnnnnddddd") -//INST(FMLAL_elt_1, "FMLAL, FMLAL2 (by element)", "0Q0011111zLMmmmm0000H0nnnnnddddd") -//INST(FMLAL_elt_2, "FMLAL, FMLAL2 (by element)", "0Q1011111zLMmmmm1000H0nnnnnddddd") -//INST(FMLSL_elt_1, "FMLSL, FMLSL2 (by element)", "0Q0011111zLMmmmm0100H0nnnnnddddd") -//INST(FMLSL_elt_2, "FMLSL, FMLSL2 (by element)", "0Q1011111zLMmmmm1100H0nnnnnddddd") -INST(MLA_elt, "MLA (by element)", "0Q101111zzLMmmmm0000H0nnnnnddddd") -INST(UMLAL_elt, "UMLAL, UMLAL2 (by element)", "0Q101111zzLMmmmm0010H0nnnnnddddd") -INST(MLS_elt, "MLS (by element)", "0Q101111zzLMmmmm0100H0nnnnnddddd") -INST(UMLSL_elt, "UMLSL, UMLSL2 (by element)", "0Q101111zzLMmmmm0110H0nnnnnddddd") -INST(UMULL_elt, "UMULL, UMULL2 (by element)", "0Q101111zzLMmmmm1010H0nnnnnddddd") -//INST(SQRDMLAH_elt_2, "SQRDMLAH (by element)", "0Q101111zzLMmmmm1101H0nnnnnddddd") -INST(UDOT_elt, "UDOT (by element)", "0Q101111zzLMmmmm1110H0nnnnnddddd") -//INST(SQRDMLSH_elt_2, "SQRDMLSH (by element)", "0Q101111zzLMmmmm1111H0nnnnnddddd") -//INST(FMULX_elt_3, "FMULX (by element)", "0Q10111100LMmmmm1001H0nnnnnddddd") -INST(FMULX_elt_4, "FMULX (by element)", "0Q1011111zLMmmmm1001H0nnnnnddddd") -INST(FCMLA_elt, "FCMLA (by element)", "0Q101111zzLMmmmm0rr1H0nnnnnddddd") - -// Data Processing - FP and SIMD - Cryptographic three register -INST(SM3TT1A, "SM3TT1A", "11001110010mmmmm10ii00nnnnnddddd") -INST(SM3TT1B, "SM3TT1B", "11001110010mmmmm10ii01nnnnnddddd") -INST(SM3TT2A, "SM3TT2A", "11001110010mmmmm10ii10nnnnnddddd") -INST(SM3TT2B, "SM3TT2B", "11001110010mmmmm10ii11nnnnnddddd") - -// Data Processing - FP and SIMD - SHA512 three register -INST(SHA512H, "SHA512H", "11001110011mmmmm100000nnnnnddddd") -INST(SHA512H2, "SHA512H2", "11001110011mmmmm100001nnnnnddddd") -INST(SHA512SU1, "SHA512SU1", "11001110011mmmmm100010nnnnnddddd") -INST(RAX1, "RAX1", "11001110011mmmmm100011nnnnnddddd") -INST(SM3PARTW1, "SM3PARTW1", "11001110011mmmmm110000nnnnnddddd") -INST(SM3PARTW2, "SM3PARTW2", "11001110011mmmmm110001nnnnnddddd") -INST(SM4EKEY, "SM4EKEY", "11001110011mmmmm110010nnnnnddddd") -INST(XAR, "XAR", "11001110100mmmmmiiiiiinnnnnddddd") - -// Data Processing - FP and SIMD - Cryptographic four register -INST(EOR3, "EOR3", "11001110000mmmmm0aaaaannnnnddddd") -INST(BCAX, "BCAX", "11001110001mmmmm0aaaaannnnnddddd") -INST(SM3SS1, "SM3SS1", "11001110010mmmmm0aaaaannnnnddddd") - -// Data Processing - FP and SIMD - SHA512 two register -INST(SHA512SU0, "SHA512SU0", "1100111011000000100000nnnnnddddd") -INST(SM4E, "SM4E", "1100111011000000100001nnnnnddddd") - -// Data Processing - FP and SIMD - Conversion between floating point and fixed point -INST(SCVTF_float_fix, "SCVTF (scalar, fixed-point)", "z0011110yy000010ppppppnnnnnddddd") -INST(UCVTF_float_fix, "UCVTF (scalar, fixed-point)", "z0011110yy000011ppppppnnnnnddddd") -INST(FCVTZS_float_fix, "FCVTZS (scalar, fixed-point)", "z0011110yy011000ppppppnnnnnddddd") -INST(FCVTZU_float_fix, "FCVTZU (scalar, fixed-point)", "z0011110yy011001ppppppnnnnnddddd") - -// Data Processing - FP and SIMD - Conversion between floating point and integer -INST(FCVTNS_float, "FCVTNS (scalar)", "z0011110yy100000000000nnnnnddddd") -INST(FCVTNU_float, "FCVTNU (scalar)", "z0011110yy100001000000nnnnnddddd") -INST(SCVTF_float_int, "SCVTF (scalar, integer)", "z0011110yy100010000000nnnnnddddd") -INST(UCVTF_float_int, "UCVTF (scalar, integer)", "z0011110yy100011000000nnnnnddddd") -INST(FCVTAS_float, "FCVTAS (scalar)", "z0011110yy100100000000nnnnnddddd") -INST(FCVTAU_float, "FCVTAU (scalar)", "z0011110yy100101000000nnnnnddddd") -INST(FMOV_float_gen, "FMOV (general)", "z0011110yy10r11o000000nnnnnddddd") -INST(FCVTPS_float, "FCVTPS (scalar)", "z0011110yy101000000000nnnnnddddd") -INST(FCVTPU_float, "FCVTPU (scalar)", "z0011110yy101001000000nnnnnddddd") -INST(FCVTMS_float, "FCVTMS (scalar)", "z0011110yy110000000000nnnnnddddd") -INST(FCVTMU_float, "FCVTMU (scalar)", "z0011110yy110001000000nnnnnddddd") -INST(FCVTZS_float_int, "FCVTZS (scalar, integer)", "z0011110yy111000000000nnnnnddddd") -INST(FCVTZU_float_int, "FCVTZU (scalar, integer)", "z0011110yy111001000000nnnnnddddd") -//INST(FJCVTZS, "FJCVTZS", "0001111001111110000000nnnnnddddd") - -// Data Processing - FP and SIMD - Floating point data processing -INST(FMOV_float, "FMOV (register)", "00011110yy100000010000nnnnnddddd") -INST(FABS_float, "FABS (scalar)", "00011110yy100000110000nnnnnddddd") -INST(FNEG_float, "FNEG (scalar)", "00011110yy100001010000nnnnnddddd") -INST(FSQRT_float, "FSQRT (scalar)", "00011110yy100001110000nnnnnddddd") -INST(FCVT_float, "FCVT", "00011110yy10001oo10000nnnnnddddd") -INST(FRINTN_float, "FRINTN (scalar)", "00011110yy100100010000nnnnnddddd") -INST(FRINTP_float, "FRINTP (scalar)", "00011110yy100100110000nnnnnddddd") -INST(FRINTM_float, "FRINTM (scalar)", "00011110yy100101010000nnnnnddddd") -INST(FRINTZ_float, "FRINTZ (scalar)", "00011110yy100101110000nnnnnddddd") -INST(FRINTA_float, "FRINTA (scalar)", "00011110yy100110010000nnnnnddddd") -INST(FRINTX_float, "FRINTX (scalar)", "00011110yy100111010000nnnnnddddd") -INST(FRINTI_float, "FRINTI (scalar)", "00011110yy100111110000nnnnnddddd") -//INST(FRINT32X_float, "FRINT32X (scalar)", "00011110yy101000110000nnnnnddddd") // ARMv8.5 -//INST(FRINT64X_float, "FRINT64X (scalar)", "00011110yy101001110000nnnnnddddd") // ARMv8.5 -//INST(FRINT32Z_float, "FRINT32Z (scalar)", "00011110yy101000010000nnnnnddddd") // ARMv8.5 -//INST(FRINT64Z_float, "FRINT64Z (scalar)", "00011110yy101001010000nnnnnddddd") // ARMv8.5 - -// Data Processing - FP and SIMD - Floating point compare -INST(FCMP_float, "FCMP", "00011110yy1mmmmm001000nnnnn0o000") -INST(FCMPE_float, "FCMPE", "00011110yy1mmmmm001000nnnnn1o000") - -// Data Processing - FP and SIMD - Floating point immediate -INST(FMOV_float_imm, "FMOV (scalar, immediate)", "00011110yy1iiiiiiii10000000ddddd") - -// Data Processing - FP and SIMD - Floating point conditional compare -INST(FCCMP_float, "FCCMP", "00011110yy1mmmmmcccc01nnnnn0ffff") -INST(FCCMPE_float, "FCCMPE", "00011110yy1mmmmmcccc01nnnnn1ffff") - -// Data Processing - FP and SIMD - Floating point data processing two register -INST(FMUL_float, "FMUL (scalar)", "00011110yy1mmmmm000010nnnnnddddd") -INST(FDIV_float, "FDIV (scalar)", "00011110yy1mmmmm000110nnnnnddddd") -INST(FADD_float, "FADD (scalar)", "00011110yy1mmmmm001010nnnnnddddd") -INST(FSUB_float, "FSUB (scalar)", "00011110yy1mmmmm001110nnnnnddddd") -INST(FMAX_float, "FMAX (scalar)", "00011110yy1mmmmm010010nnnnnddddd") -INST(FMIN_float, "FMIN (scalar)", "00011110yy1mmmmm010110nnnnnddddd") -INST(FMAXNM_float, "FMAXNM (scalar)", "00011110yy1mmmmm011010nnnnnddddd") -INST(FMINNM_float, "FMINNM (scalar)", "00011110yy1mmmmm011110nnnnnddddd") -INST(FNMUL_float, "FNMUL (scalar)", "00011110yy1mmmmm100010nnnnnddddd") - -// Data Processing - FP and SIMD - Floating point conditional select -INST(FCSEL_float, "FCSEL", "00011110yy1mmmmmcccc11nnnnnddddd") - -// Data Processing - FP and SIMD - Floating point data processing three register -INST(FMADD_float, "FMADD", "00011111yy0mmmmm0aaaaannnnnddddd") -INST(FMSUB_float, "FMSUB", "00011111yy0mmmmm1aaaaannnnnddddd") -INST(FNMADD_float, "FNMADD", "00011111yy1mmmmm0aaaaannnnnddddd") -INST(FNMSUB_float, "FNMSUB", "00011111yy1mmmmm1aaaaannnnnddddd") - -// BFloat16 -//INST(BFCVT, "BFCVT", "0001111001100011010000nnnnnddddd") // v8.6 -//INST(BFCVTN, "BFCVTN{2}", "0Q00111010100001011010nnnnnddddd") // v8.6 -//INST(BFDOT_element, "BFDOT (by element)", "0Q00111101LMmmmm1111H0nnnnnddddd") // v8.6 -//INST(BFDOT_vec, "BFDOT (vector)", "0Q101110010mmmmm111111nnnnnddddd") // v8.6 -//INST(BFMLALX_element, "BFMLALX (by element)", "0Q00111111LMmmmm1111H0nnnnnddddd") // v8.6 -//INST(BFMLALX_vector, "BFMLALX (vector)", "0Q101110110mmmmm111111nnnnnddddd") // v8.6 -//INST(BFMMLA, "BFMMLA", "01101110010mmmmm111011nnnnnddddd") // v8.6 +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// DO NOT REORDER + +INST(FMOV_3, "FMOV (vector, immediate)", "0Q00111100000abc111111defghddddd") +INST(FMOV_2, "FMOV (vector, immediate)", "0Qo0111100000abc111101defghddddd") +INST(MOVI, "MOVI, MVNI, ORR, BIC (vector, immediate)", "0Qo0111100000abcmmmm01defghddddd") +INST(UnallocatedEncoding, "Unallocated SIMD modified immediate", "0--0111100000-------11----------") +INST(NOP, "NOP", "11010101000000110010000000011111") +INST(YIELD, "YIELD", "11010101000000110010000000111111") +INST(WFE, "WFE", "11010101000000110010000001011111") +INST(WFI, "WFI", "11010101000000110010000001111111") +INST(SEV, "SEV", "11010101000000110010000010011111") +INST(SEVL, "SEVL", "11010101000000110010000010111111") +INST(CFINV, "CFINV", "11010101000000000100000000011111") +INST(XAFlag, "XAFlag", "11010101000000000100000000111111") +INST(AXFlag, "AXFlag", "11010101000000000100000001011111") +INST(IC_IALLU, "IC IALLU", "11010101000010000111010100011111") +INST(IC_IALLUIS, "IC IALLUIS", "11010101000010000111000100011111") +INST(CLREX, "CLREX", "11010101000000110011MMMM01011111") +INST(DSB, "DSB", "11010101000000110011MMMM10011111") +INST(DMB, "DMB", "11010101000000110011MMMM10111111") +INST(ISB, "ISB", "11010101000000110011MMMM11011111") +INST(DC_IVAC, "DC IVAC", "110101010000100001110110001ttttt") +INST(DC_ISW, "DC ISW", "110101010000100001110110010ttttt") +INST(DC_CSW, "DC CSW", "110101010000100001111010010ttttt") +INST(DC_CISW, "DC CISW", "110101010000100001111110010ttttt") +INST(DC_ZVA, "DC ZVA", "110101010000101101110100001ttttt") +INST(DC_CVAC, "DC CVAC", "110101010000101101111010001ttttt") +INST(DC_CVAU, "DC CVAU", "110101010000101101111011001ttttt") +INST(DC_CVAP, "DC CVAP", "110101010000101101111100001ttttt") +INST(DC_CIVAC, "DC CIVAC", "110101010000101101111110001ttttt") +INST(IC_IVAU, "IC IVAU", "110101010000101101110101001ttttt") +INST(BLR, "BLR", "1101011000111111000000nnnnn00000") +INST(BR, "BR", "1101011000011111000000nnnnn00000") +INST(RET, "RET", "1101011001011111000000nnnnn00000") +INST(HINT, "HINT", "11010101000000110010MMMMooo11111") +INST(SXTB_1, "SXTB (32-bit)", "0001001100000000000111nnnnnddddd") +INST(SXTB_2, "SXTB (64-bit)", "1001001101000000000111nnnnnddddd") +INST(SXTH_1, "SXTH (32-bit)", "0001001100000000001111nnnnnddddd") +INST(SXTH_2, "SXTH (64-bit)", "1001001101000000001111nnnnnddddd") +INST(SXTW, "SXTW", "1001001101000000011111nnnnnddddd") +INST(REV32_int, "REV32", "1101101011000000000010nnnnnddddd") +INST(AESE, "AESE", "0100111000101000010010nnnnnddddd") +INST(AESD, "AESD", "0100111000101000010110nnnnnddddd") +INST(AESMC, "AESMC", "0100111000101000011010nnnnnddddd") +INST(AESIMC, "AESIMC", "0100111000101000011110nnnnnddddd") +INST(SHA1H, "SHA1H", "0101111000101000000010nnnnnddddd") +INST(SHA1SU1, "SHA1SU1", "0101111000101000000110nnnnnddddd") +INST(SHA256SU0, "SHA256SU0", "0101111000101000001010nnnnnddddd") +INST(FCMEQ_zero_1, "FCMEQ (zero)", "0101111011111000110110nnnnnddddd") +INST(FRECPE_1, "FRECPE", "0101111011111001110110nnnnnddddd") +INST(FRECPX_1, "FRECPX", "0101111011111001111110nnnnnddddd") +INST(FRSQRTE_1, "FRSQRTE", "0111111011111001110110nnnnnddddd") +INST(SHA512SU0, "SHA512SU0", "1100111011000000100000nnnnnddddd") +INST(SM4E, "SM4E", "1100111011000000100001nnnnnddddd") +INST(RBIT_int, "RBIT", "z101101011000000000000nnnnnddddd") +INST(REV16_int, "REV16", "z101101011000000000001nnnnnddddd") +INST(CLZ_int, "CLZ", "z101101011000000000100nnnnnddddd") +INST(CLS_int, "CLS", "z101101011000000000101nnnnnddddd") +INST(FCVTNS_2, "FCVTNS (vector)", "010111100z100001101010nnnnnddddd") +INST(FCVTMS_2, "FCVTMS (vector)", "010111100z100001101110nnnnnddddd") +INST(FCVTAS_2, "FCVTAS (vector)", "010111100z100001110010nnnnnddddd") +INST(SCVTF_int_2, "SCVTF (vector, integer)", "010111100z100001110110nnnnnddddd") +INST(FCMGT_zero_2, "FCMGT (zero)", "010111101z100000110010nnnnnddddd") +INST(FCMEQ_zero_2, "FCMEQ (zero)", "010111101z100000110110nnnnnddddd") +INST(FCMLT_2, "FCMLT (zero)", "010111101z100000111010nnnnnddddd") +INST(FCVTPS_2, "FCVTPS (vector)", "010111101z100001101010nnnnnddddd") +INST(FCVTZS_int_2, "FCVTZS (vector, integer)", "010111101z100001101110nnnnnddddd") +INST(FRECPE_2, "FRECPE", "010111101z100001110110nnnnnddddd") +INST(FRECPX_2, "FRECPX", "010111101z100001111110nnnnnddddd") +INST(FCVTNU_2, "FCVTNU (vector)", "011111100z100001101010nnnnnddddd") +INST(FCVTMU_2, "FCVTMU (vector)", "011111100z100001101110nnnnnddddd") +INST(FCVTAU_2, "FCVTAU (vector)", "011111100z100001110010nnnnnddddd") +INST(UCVTF_int_2, "UCVTF (vector, integer)", "011111100z100001110110nnnnnddddd") +INST(FCMGE_zero_2, "FCMGE (zero)", "011111101z100000110010nnnnnddddd") +INST(FCMLE_2, "FCMLE (zero)", "011111101z100000110110nnnnnddddd") +INST(FCVTPU_2, "FCVTPU (vector)", "011111101z100001101010nnnnnddddd") +INST(FCVTZU_int_2, "FCVTZU (vector, integer)", "011111101z100001101110nnnnnddddd") +INST(FRSQRTE_2, "FRSQRTE", "011111101z100001110110nnnnnddddd") +INST(FCVTXN_1, "FCVTXN, FCVTXN2", "011111100z100001011010nnnnnddddd") +INST(FMAXNMP_pair_2, "FMAXNMP (scalar)", "011111100z110000110010nnnnnddddd") +INST(FADDP_pair_2, "FADDP (scalar)", "011111100z110000110110nnnnnddddd") +INST(FMAXP_pair_2, "FMAXP (scalar)", "011111100z110000111110nnnnnddddd") +INST(FMINNMP_pair_2, "FMINNMP (scalar)", "011111101z110000110010nnnnnddddd") +INST(FMINP_pair_2, "FMINP (scalar)", "011111101z110000111110nnnnnddddd") +INST(FRINTN_1, "FRINTN (vector)", "0Q00111001111001100010nnnnnddddd") +INST(FRINTM_1, "FRINTM (vector)", "0Q00111001111001100110nnnnnddddd") +INST(FCMEQ_zero_3, "FCMEQ (zero)", "0Q00111011111000110110nnnnnddddd") +INST(FABS_1, "FABS (vector)", "0Q00111011111000111110nnnnnddddd") +INST(FRINTP_1, "FRINTP (vector)", "0Q00111011111001100010nnnnnddddd") +INST(FRINTZ_1, "FRINTZ (vector)", "0Q00111011111001100110nnnnnddddd") +INST(FRECPE_3, "FRECPE", "0Q00111011111001110110nnnnnddddd") +INST(FRINTA_1, "FRINTA (vector)", "0Q10111001111001100010nnnnnddddd") +INST(FRINTX_1, "FRINTX (vector)", "0Q10111001111001100110nnnnnddddd") +INST(NOT, "NOT", "0Q10111000100000010110nnnnnddddd") +INST(RBIT_asimd, "RBIT (vector)", "0Q10111001100000010110nnnnnddddd") +INST(FNEG_1, "FNEG (vector)", "0Q10111011111000111110nnnnnddddd") +INST(FRINTI_1, "FRINTI (vector)", "0Q10111011111001100110nnnnnddddd") +INST(FRSQRTE_3, "FRSQRTE", "0Q10111011111001110110nnnnnddddd") +INST(LDXR, "LDXRB, LDXRH, LDXR", "zz00100001011111011111nnnnnttttt") +INST(LDAXR, "LDAXRB, LDAXRH, LDAXR", "zz00100001011111111111nnnnnttttt") +INST(STLLR, "STLLRB, STLLRH, STLLR", "zz00100010011111011111nnnnnttttt") +INST(STLR, "STLRB, STLRH, STLR", "zz00100010011111111111nnnnnttttt") +INST(LDLAR, "LDLARB, LDLARH, LDLAR", "zz00100011011111011111nnnnnttttt") +INST(LDAR, "LDARB, LDARH, LDAR", "zz00100011011111111111nnnnnttttt") +INST(REV, "REV", "z10110101100000000001onnnnnddddd") +INST(SUQADD_1, "SUQADD", "01011110zz100000001110nnnnnddddd") +INST(SQABS_1, "SQABS", "01011110zz100000011110nnnnnddddd") +INST(CMGT_zero_1, "CMGT (zero)", "01011110zz100000100010nnnnnddddd") +INST(CMEQ_zero_1, "CMEQ (zero)", "01011110zz100000100110nnnnnddddd") +INST(CMLT_1, "CMLT (zero)", "01011110zz100000101010nnnnnddddd") +INST(ABS_1, "ABS", "01011110zz100000101110nnnnnddddd") +INST(SQXTN_1, "SQXTN, SQXTN2", "01011110zz100001010010nnnnnddddd") +INST(USQADD_1, "USQADD", "01111110zz100000001110nnnnnddddd") +INST(SQNEG_1, "SQNEG", "01111110zz100000011110nnnnnddddd") +INST(CMGE_zero_1, "CMGE (zero)", "01111110zz100000100010nnnnnddddd") +INST(CMLE_1, "CMLE (zero)", "01111110zz100000100110nnnnnddddd") +INST(NEG_1, "NEG (vector)", "01111110zz100000101110nnnnnddddd") +INST(SQXTUN_1, "SQXTUN, SQXTUN2", "01111110zz100001001010nnnnnddddd") +INST(UQXTN_1, "UQXTN, UQXTN2", "01111110zz100001010010nnnnnddddd") +INST(ADDP_pair, "ADDP (scalar)", "01011110zz110001101110nnnnnddddd") +INST(FCVTN, "FCVTN, FCVTN2", "0Q0011100z100001011010nnnnnddddd") +INST(FCVTL, "FCVTL, FCVTL2", "0Q0011100z100001011110nnnnnddddd") +INST(FRINTN_2, "FRINTN (vector)", "0Q0011100z100001100010nnnnnddddd") +INST(FRINTM_2, "FRINTM (vector)", "0Q0011100z100001100110nnnnnddddd") +INST(FCVTNS_4, "FCVTNS (vector)", "0Q0011100z100001101010nnnnnddddd") +INST(FCVTMS_4, "FCVTMS (vector)", "0Q0011100z100001101110nnnnnddddd") +INST(FCVTAS_4, "FCVTAS (vector)", "0Q0011100z100001110010nnnnnddddd") +INST(SCVTF_int_4, "SCVTF (vector, integer)", "0Q0011100z100001110110nnnnnddddd") +INST(FCMGT_zero_4, "FCMGT (zero)", "0Q0011101z100000110010nnnnnddddd") +INST(FCMEQ_zero_4, "FCMEQ (zero)", "0Q0011101z100000110110nnnnnddddd") +INST(FCMLT_4, "FCMLT (zero)", "0Q0011101z100000111010nnnnnddddd") +INST(FABS_2, "FABS (vector)", "0Q0011101z100000111110nnnnnddddd") +INST(FRINTP_2, "FRINTP (vector)", "0Q0011101z100001100010nnnnnddddd") +INST(FRINTZ_2, "FRINTZ (vector)", "0Q0011101z100001100110nnnnnddddd") +INST(FCVTPS_4, "FCVTPS (vector)", "0Q0011101z100001101010nnnnnddddd") +INST(FCVTZS_int_4, "FCVTZS (vector, integer)", "0Q0011101z100001101110nnnnnddddd") +INST(URECPE, "URECPE", "0Q0011101z100001110010nnnnnddddd") +INST(FRECPE_4, "FRECPE", "0Q0011101z100001110110nnnnnddddd") +INST(FCVTXN_2, "FCVTXN, FCVTXN2", "0Q1011100z100001011010nnnnnddddd") +INST(FRINTA_2, "FRINTA (vector)", "0Q1011100z100001100010nnnnnddddd") +INST(FRINTX_2, "FRINTX (vector)", "0Q1011100z100001100110nnnnnddddd") +INST(FCVTNU_4, "FCVTNU (vector)", "0Q1011100z100001101010nnnnnddddd") +INST(FCVTMU_4, "FCVTMU (vector)", "0Q1011100z100001101110nnnnnddddd") +INST(FCVTAU_4, "FCVTAU (vector)", "0Q1011100z100001110010nnnnnddddd") +INST(UCVTF_int_4, "UCVTF (vector, integer)", "0Q1011100z100001110110nnnnnddddd") +INST(FNEG_2, "FNEG (vector)", "0Q1011101z100000111110nnnnnddddd") +INST(FRINTI_2, "FRINTI (vector)", "0Q1011101z100001100110nnnnnddddd") +INST(FCMGE_zero_4, "FCMGE (zero)", "0Q1011101z100000110010nnnnnddddd") +INST(FCMLE_4, "FCMLE (zero)", "0Q1011101z100000110110nnnnnddddd") +INST(FCVTPU_4, "FCVTPU (vector)", "0Q1011101z100001101010nnnnnddddd") +INST(FCVTZU_int_4, "FCVTZU (vector, integer)", "0Q1011101z100001101110nnnnnddddd") +INST(URSQRTE, "URSQRTE", "0Q1011101z100001110010nnnnnddddd") +INST(FRSQRTE_4, "FRSQRTE", "0Q1011101z100001110110nnnnnddddd") +INST(FSQRT_2, "FSQRT (vector)", "0Q1011101z100001111110nnnnnddddd") +INST(FMAXNMV_2, "FMAXNMV", "0Q1011100z110000110010nnnnnddddd") +INST(FMAXV_2, "FMAXV", "0Q1011100z110000111110nnnnnddddd") +INST(FMINNMV_2, "FMINNMV", "0Q1011101z110000110010nnnnnddddd") +INST(FMINV_2, "FMINV", "0Q1011101z110000111110nnnnnddddd") +INST(FMOV_float, "FMOV (register)", "00011110yy100000010000nnnnnddddd") +INST(FABS_float, "FABS (scalar)", "00011110yy100000110000nnnnnddddd") +INST(FNEG_float, "FNEG (scalar)", "00011110yy100001010000nnnnnddddd") +INST(FSQRT_float, "FSQRT (scalar)", "00011110yy100001110000nnnnnddddd") +INST(FRINTN_float, "FRINTN (scalar)", "00011110yy100100010000nnnnnddddd") +INST(FRINTP_float, "FRINTP (scalar)", "00011110yy100100110000nnnnnddddd") +INST(FRINTM_float, "FRINTM (scalar)", "00011110yy100101010000nnnnnddddd") +INST(FRINTZ_float, "FRINTZ (scalar)", "00011110yy100101110000nnnnnddddd") +INST(FRINTA_float, "FRINTA (scalar)", "00011110yy100110010000nnnnnddddd") +INST(FRINTX_float, "FRINTX (scalar)", "00011110yy100111010000nnnnnddddd") +INST(FRINTI_float, "FRINTI (scalar)", "00011110yy100111110000nnnnnddddd") +INST(LD1R_1, "LD1R", "0Q001101010000001100zznnnnnttttt") +INST(LD3R_1, "LD3R", "0Q001101010000001110zznnnnnttttt") +INST(LD2R_1, "LD2R", "0Q001101011000001100zznnnnnttttt") +INST(LD4R_1, "LD4R", "0Q001101011000001110zznnnnnttttt") +INST(REV64_asimd, "REV64", "0Q001110zz100000000010nnnnnddddd") +INST(REV16_asimd, "REV16 (vector)", "0Q001110zz100000000110nnnnnddddd") +INST(SADDLP, "SADDLP", "0Q001110zz100000001010nnnnnddddd") +INST(SUQADD_2, "SUQADD", "0Q001110zz100000001110nnnnnddddd") +INST(CLS_asimd, "CLS (vector)", "0Q001110zz100000010010nnnnnddddd") +INST(CNT, "CNT", "0Q001110zz100000010110nnnnnddddd") +INST(SADALP, "SADALP", "0Q001110zz100000011010nnnnnddddd") +INST(SQABS_2, "SQABS", "0Q001110zz100000011110nnnnnddddd") +INST(CMGT_zero_2, "CMGT (zero)", "0Q001110zz100000100010nnnnnddddd") +INST(CMEQ_zero_2, "CMEQ (zero)", "0Q001110zz100000100110nnnnnddddd") +INST(CMLT_2, "CMLT (zero)", "0Q001110zz100000101010nnnnnddddd") +INST(ABS_2, "ABS", "0Q001110zz100000101110nnnnnddddd") +INST(XTN, "XTN, XTN2", "0Q001110zz100001001010nnnnnddddd") +INST(SQXTN_2, "SQXTN, SQXTN2", "0Q001110zz100001010010nnnnnddddd") +INST(REV32_asimd, "REV32 (vector)", "0Q101110zz100000000010nnnnnddddd") +INST(UADDLP, "UADDLP", "0Q101110zz100000001010nnnnnddddd") +INST(USQADD_2, "USQADD", "0Q101110zz100000001110nnnnnddddd") +INST(CLZ_asimd, "CLZ (vector)", "0Q101110zz100000010010nnnnnddddd") +INST(UADALP, "UADALP", "0Q101110zz100000011010nnnnnddddd") +INST(SQNEG_2, "SQNEG", "0Q101110zz100000011110nnnnnddddd") +INST(CMGE_zero_2, "CMGE (zero)", "0Q101110zz100000100010nnnnnddddd") +INST(CMLE_2, "CMLE (zero)", "0Q101110zz100000100110nnnnnddddd") +INST(NEG_2, "NEG (vector)", "0Q101110zz100000101110nnnnnddddd") +INST(SQXTUN_2, "SQXTUN, SQXTUN2", "0Q101110zz100001001010nnnnnddddd") +INST(SHLL, "SHLL, SHLL2", "0Q101110zz100001001110nnnnnddddd") +INST(UQXTN_2, "UQXTN, UQXTN2", "0Q101110zz100001010010nnnnnddddd") +INST(SADDLV, "SADDLV", "0Q001110zz110000001110nnnnnddddd") +INST(SMAXV, "SMAXV", "0Q001110zz110000101010nnnnnddddd") +INST(SMINV, "SMINV", "0Q001110zz110001101010nnnnnddddd") +INST(ADDV, "ADDV", "0Q001110zz110001101110nnnnnddddd") +INST(UADDLV, "UADDLV", "0Q101110zz110000001110nnnnnddddd") +INST(UMAXV, "UMAXV", "0Q101110zz110000101010nnnnnddddd") +INST(UMINV, "UMINV", "0Q101110zz110001101010nnnnnddddd") +INST(FCVTNS_float, "FCVTNS (scalar)", "z0011110yy100000000000nnnnnddddd") +INST(FCVTNU_float, "FCVTNU (scalar)", "z0011110yy100001000000nnnnnddddd") +INST(SCVTF_float_int, "SCVTF (scalar, integer)", "z0011110yy100010000000nnnnnddddd") +INST(UCVTF_float_int, "UCVTF (scalar, integer)", "z0011110yy100011000000nnnnnddddd") +INST(FCVTAS_float, "FCVTAS (scalar)", "z0011110yy100100000000nnnnnddddd") +INST(FCVTAU_float, "FCVTAU (scalar)", "z0011110yy100101000000nnnnnddddd") +INST(FCVTPS_float, "FCVTPS (scalar)", "z0011110yy101000000000nnnnnddddd") +INST(FCVTPU_float, "FCVTPU (scalar)", "z0011110yy101001000000nnnnnddddd") +INST(FCVTMS_float, "FCVTMS (scalar)", "z0011110yy110000000000nnnnnddddd") +INST(FCVTMU_float, "FCVTMU (scalar)", "z0011110yy110001000000nnnnnddddd") +INST(FCVTZS_float_int, "FCVTZS (scalar, integer)", "z0011110yy111000000000nnnnnddddd") +INST(FCVTZU_float_int, "FCVTZU (scalar, integer)", "z0011110yy111001000000nnnnnddddd") +INST(FCMP_float, "FCMP", "00011110yy1mmmmm001000nnnnn0o000") +INST(FCMPE_float, "FCMPE", "00011110yy1mmmmm001000nnnnn1o000") +INST(FCVT_float, "FCVT", "00011110yy10001oo10000nnnnnddddd") +INST(ASR_1, "ASR (immediate, 32-bit)", "00010011000rrrrr011111nnnnnddddd") +INST(RMIF, "RMIF", "10111010000iiiiii00001nnnnn0IIII") +INST(SMULH, "SMULH", "10011011010mmmmm011111nnnnnddddd") +INST(UMULH, "UMULH", "10011011110mmmmm011111nnnnnddddd") +INST(SHA1C, "SHA1C", "01011110000mmmmm000000nnnnnddddd") +INST(SHA1P, "SHA1P", "01011110000mmmmm000100nnnnnddddd") +INST(SHA1M, "SHA1M", "01011110000mmmmm001000nnnnnddddd") +INST(SHA1SU0, "SHA1SU0", "01011110000mmmmm001100nnnnnddddd") +INST(SHA256H, "SHA256H", "01011110000mmmmm010000nnnnnddddd") +INST(SHA256H2, "SHA256H2", "01011110000mmmmm010100nnnnnddddd") +INST(SHA256SU1, "SHA256SU1", "01011110000mmmmm011000nnnnnddddd") +INST(DUP_elt_1, "DUP (element)", "01011110000iiiii000001nnnnnddddd") +INST(FCMEQ_reg_1, "FCMEQ (register)", "01011110010mmmmm001001nnnnnddddd") +INST(FRECPS_1, "FRECPS", "01011110010mmmmm001111nnnnnddddd") +INST(FRSQRTS_1, "FRSQRTS", "01011110110mmmmm001111nnnnnddddd") +INST(INS_gen, "INS (general)", "01001110000iiiii000111nnnnnddddd") +INST(SHA512H, "SHA512H", "11001110011mmmmm100000nnnnnddddd") +INST(SHA512H2, "SHA512H2", "11001110011mmmmm100001nnnnnddddd") +INST(SHA512SU1, "SHA512SU1", "11001110011mmmmm100010nnnnnddddd") +INST(RAX1, "RAX1", "11001110011mmmmm100011nnnnnddddd") +INST(SM3PARTW1, "SM3PARTW1", "11001110011mmmmm110000nnnnnddddd") +INST(SM3PARTW2, "SM3PARTW2", "11001110011mmmmm110001nnnnnddddd") +INST(SM4EKEY, "SM4EKEY", "11001110011mmmmm110010nnnnnddddd") +INST(FMOV_float_gen, "FMOV (general)", "z0011110yy10r11o000000nnnnnddddd") +INST(FMOV_float_imm, "FMOV (scalar, immediate)", "00011110yy1iiiiiiii10000000ddddd") +INST(ASR_2, "ASR (immediate, 64-bit)", "1001001101rrrrrr111111nnnnnddddd") +INST(SVC, "SVC", "11010100000iiiiiiiiiiiiiiii00001") +INST(BRK, "BRK", "11010100001iiiiiiiiiiiiiiii00000") +INST(ST1_sngl_1, "ST1 (single structure)", "0Q00110100000000oo0Szznnnnnttttt") +INST(ST3_sngl_1, "ST3 (single structure)", "0Q00110100000000oo1Szznnnnnttttt") +INST(ST2_sngl_1, "ST2 (single structure)", "0Q00110100100000oo0Szznnnnnttttt") +INST(ST4_sngl_1, "ST4 (single structure)", "0Q00110100100000oo1Szznnnnnttttt") +INST(LD1_sngl_1, "LD1 (single structure)", "0Q00110101000000oo0Szznnnnnttttt") +INST(LD3_sngl_1, "LD3 (single structure)", "0Q00110101000000oo1Szznnnnnttttt") +INST(LD2_sngl_1, "LD2 (single structure)", "0Q00110101100000oo0Szznnnnnttttt") +INST(LD4_sngl_1, "LD4 (single structure)", "0Q00110101100000oo1Szznnnnnttttt") +INST(LDXP, "LDXP", "1z001000011111110uuuuunnnnnttttt") +INST(LDAXP, "LDAXP", "1z001000011111111uuuuunnnnnttttt") +INST(UDIV, "UDIV", "z0011010110mmmmm000010nnnnnddddd") +INST(SDIV, "SDIV", "z0011010110mmmmm000011nnnnnddddd") +INST(LSLV, "LSLV", "z0011010110mmmmm001000nnnnnddddd") +INST(LSRV, "LSRV", "z0011010110mmmmm001001nnnnnddddd") +INST(ASRV, "ASRV", "z0011010110mmmmm001010nnnnnddddd") +INST(RORV, "RORV", "z0011010110mmmmm001011nnnnnddddd") +INST(ADC, "ADC", "z0011010000mmmmm000000nnnnnddddd") +INST(ADCS, "ADCS", "z0111010000mmmmm000000nnnnnddddd") +INST(SBC, "SBC", "z1011010000mmmmm000000nnnnnddddd") +INST(SBCS, "SBCS", "z1111010000mmmmm000000nnnnnddddd") +INST(FMULX_vec_2, "FMULX", "010111100z1mmmmm110111nnnnnddddd") +INST(FCMEQ_reg_2, "FCMEQ (register)", "010111100z1mmmmm111001nnnnnddddd") +INST(FRECPS_2, "FRECPS", "010111100z1mmmmm111111nnnnnddddd") +INST(FRSQRTS_2, "FRSQRTS", "010111101z1mmmmm111111nnnnnddddd") +INST(FCMGE_reg_2, "FCMGE (register)", "011111100z1mmmmm111001nnnnnddddd") +INST(FACGE_2, "FACGE", "011111100z1mmmmm111011nnnnnddddd") +INST(FABD_2, "FABD", "011111101z1mmmmm110101nnnnnddddd") +INST(FCMGT_reg_2, "FCMGT (register)", "011111101z1mmmmm111001nnnnnddddd") +INST(FACGT_2, "FACGT", "011111101z1mmmmm111011nnnnnddddd") +INST(DUP_elt_2, "DUP (element)", "0Q001110000iiiii000001nnnnnddddd") +INST(DUP_gen, "DUP (general)", "0Q001110000iiiii000011nnnnnddddd") +INST(SMOV, "SMOV", "0Q001110000iiiii001011nnnnnddddd") +INST(UMOV, "UMOV", "0Q001110000iiiii001111nnnnnddddd") +INST(FCMEQ_reg_3, "FCMEQ (register)", "0Q001110010mmmmm001001nnnnnddddd") +INST(FRECPS_3, "FRECPS", "0Q001110010mmmmm001111nnnnnddddd") +INST(FRSQRTS_3, "FRSQRTS", "0Q001110110mmmmm001111nnnnnddddd") +INST(FMLA_vec_1, "FMLA (vector)", "0Q001110010mmmmm000011nnnnnddddd") +INST(FMLS_vec_1, "FMLS (vector)", "0Q001110110mmmmm000011nnnnnddddd") +INST(AND_asimd, "AND (vector)", "0Q001110001mmmmm000111nnnnnddddd") +INST(BIC_asimd_reg, "BIC (vector, register)", "0Q001110011mmmmm000111nnnnnddddd") +INST(ORR_asimd_reg, "ORR (vector, register)", "0Q001110101mmmmm000111nnnnnddddd") +INST(ORN_asimd, "ORN (vector)", "0Q001110111mmmmm000111nnnnnddddd") +INST(EOR_asimd, "EOR (vector)", "0Q101110001mmmmm000111nnnnnddddd") +INST(BSL, "BSL", "0Q101110011mmmmm000111nnnnnddddd") +INST(BIT, "BIT", "0Q101110101mmmmm000111nnnnnddddd") +INST(BIF, "BIF", "0Q101110111mmmmm000111nnnnnddddd") +INST(STx_mult_1, "STx (multiple structures)", "0Q00110000000000oooozznnnnnttttt") +INST(LDx_mult_1, "LDx (multiple structures)", "0Q00110001000000oooozznnnnnttttt") +INST(STXR, "STXRB, STXRH, STXR", "zz001000000sssss011111nnnnnttttt") +INST(STLXR, "STLXRB, STLXRH, STLXR", "zz001000000sssss111111nnnnnttttt") +INST(SQADD_1, "SQADD", "01011110zz1mmmmm000011nnnnnddddd") +INST(SQSUB_1, "SQSUB", "01011110zz1mmmmm001011nnnnnddddd") +INST(CMGT_reg_1, "CMGT (register)", "01011110zz1mmmmm001101nnnnnddddd") +INST(CMGE_reg_1, "CMGE (register)", "01011110zz1mmmmm001111nnnnnddddd") +INST(SSHL_1, "SSHL", "01011110zz1mmmmm010001nnnnnddddd") +INST(SQSHL_reg_1, "SQSHL (register)", "01011110zz1mmmmm010011nnnnnddddd") +INST(SRSHL_1, "SRSHL", "01011110zz1mmmmm010101nnnnnddddd") +INST(ADD_1, "ADD (vector)", "01011110zz1mmmmm100001nnnnnddddd") +INST(CMTST_1, "CMTST", "01011110zz1mmmmm100011nnnnnddddd") +INST(SQDMULH_vec_1, "SQDMULH (vector)", "01011110zz1mmmmm101101nnnnnddddd") +INST(UQADD_1, "UQADD", "01111110zz1mmmmm000011nnnnnddddd") +INST(UQSUB_1, "UQSUB", "01111110zz1mmmmm001011nnnnnddddd") +INST(CMHI_1, "CMHI (register)", "01111110zz1mmmmm001101nnnnnddddd") +INST(CMHS_1, "CMHS (register)", "01111110zz1mmmmm001111nnnnnddddd") +INST(USHL_1, "USHL", "01111110zz1mmmmm010001nnnnnddddd") +INST(UQSHL_reg_1, "UQSHL (register)", "01111110zz1mmmmm010011nnnnnddddd") +INST(URSHL_1, "URSHL", "01111110zz1mmmmm010101nnnnnddddd") +INST(SUB_1, "SUB (vector)", "01111110zz1mmmmm100001nnnnnddddd") +INST(CMEQ_reg_1, "CMEQ (register)", "01111110zz1mmmmm100011nnnnnddddd") +INST(SQRDMULH_vec_1, "SQRDMULH (vector)", "01111110zz1mmmmm101101nnnnnddddd") +INST(SSHR_1, "SSHR", "010111110IIIIiii000001nnnnnddddd") +INST(SSRA_1, "SSRA", "010111110IIIIiii000101nnnnnddddd") +INST(SRSHR_1, "SRSHR", "010111110IIIIiii001001nnnnnddddd") +INST(SRSRA_1, "SRSRA", "010111110IIIIiii001101nnnnnddddd") +INST(SHL_1, "SHL", "010111110IIIIiii010101nnnnnddddd") +INST(SQSHL_imm_1, "SQSHL (immediate)", "010111110IIIIiii011101nnnnnddddd") +INST(SQSHRN_1, "SQSHRN, SQSHRN2", "010111110IIIIiii100101nnnnnddddd") +INST(SCVTF_fix_1, "SCVTF (vector, fixed-point)", "010111110IIIIiii111001nnnnnddddd") +INST(FCVTZS_fix_1, "FCVTZS (vector, fixed-point)", "010111110IIIIiii111111nnnnnddddd") +INST(USHR_1, "USHR", "011111110IIIIiii000001nnnnnddddd") +INST(USRA_1, "USRA", "011111110IIIIiii000101nnnnnddddd") +INST(URSHR_1, "URSHR", "011111110IIIIiii001001nnnnnddddd") +INST(URSRA_1, "URSRA", "011111110IIIIiii001101nnnnnddddd") +INST(SRI_1, "SRI", "011111110IIIIiii010001nnnnnddddd") +INST(SLI_1, "SLI", "011111110IIIIiii010101nnnnnddddd") +INST(SQSHLU_1, "SQSHLU", "011111110IIIIiii011001nnnnnddddd") +INST(UQSHL_imm_1, "UQSHL (immediate)", "011111110IIIIiii011101nnnnnddddd") +INST(SQSHRUN_1, "SQSHRUN, SQSHRUN2", "011111110IIIIiii100001nnnnnddddd") +INST(UQSHRN_1, "UQSHRN, UQSHRN2", "011111110IIIIiii100101nnnnnddddd") +INST(UCVTF_fix_1, "UCVTF (vector, fixed-point)", "011111110IIIIiii111001nnnnnddddd") +INST(FCVTZU_fix_1, "FCVTZU (vector, fixed-point)", "011111110IIIIiii111111nnnnnddddd") +INST(FMLA_elt_1, "FMLA (by element)", "0101111100LMmmmm0001H0nnnnnddddd") +INST(FMLS_elt_1, "FMLS (by element)", "0101111100LMmmmm0101H0nnnnnddddd") +INST(FMAXNM_2, "FMAXNM (vector)", "0Q0011100z1mmmmm110001nnnnnddddd") +INST(FMLA_vec_2, "FMLA (vector)", "0Q0011100z1mmmmm110011nnnnnddddd") +INST(FADD_2, "FADD (vector)", "0Q0011100z1mmmmm110101nnnnnddddd") +INST(FMAX_2, "FMAX (vector)", "0Q0011100z1mmmmm111101nnnnnddddd") +INST(FMULX_vec_4, "FMULX", "0Q0011100z1mmmmm110111nnnnnddddd") +INST(FCMEQ_reg_4, "FCMEQ (register)", "0Q0011100z1mmmmm111001nnnnnddddd") +INST(FRECPS_4, "FRECPS", "0Q0011100z1mmmmm111111nnnnnddddd") +INST(FMINNM_2, "FMINNM (vector)", "0Q0011101z1mmmmm110001nnnnnddddd") +INST(FMLS_vec_2, "FMLS (vector)", "0Q0011101z1mmmmm110011nnnnnddddd") +INST(FSUB_2, "FSUB (vector)", "0Q0011101z1mmmmm110101nnnnnddddd") +INST(FMIN_2, "FMIN (vector)", "0Q0011101z1mmmmm111101nnnnnddddd") +INST(FRSQRTS_4, "FRSQRTS", "0Q0011101z1mmmmm111111nnnnnddddd") +INST(FMAXNMP_vec_2, "FMAXNMP (vector)", "0Q1011100z1mmmmm110001nnnnnddddd") +INST(FADDP_vec_2, "FADDP (vector)", "0Q1011100z1mmmmm110101nnnnnddddd") +INST(FMUL_vec_2, "FMUL (vector)", "0Q1011100z1mmmmm110111nnnnnddddd") +INST(FCMGE_reg_4, "FCMGE (register)", "0Q1011100z1mmmmm111001nnnnnddddd") +INST(FACGE_4, "FACGE", "0Q1011100z1mmmmm111011nnnnnddddd") +INST(FMAXP_vec_2, "FMAXP (vector)", "0Q1011100z1mmmmm111101nnnnnddddd") +INST(FDIV_2, "FDIV (vector)", "0Q1011100z1mmmmm111111nnnnnddddd") +INST(FMINNMP_vec_2, "FMINNMP (vector)", "0Q1011101z1mmmmm110001nnnnnddddd") +INST(FABD_4, "FABD", "0Q1011101z1mmmmm110101nnnnnddddd") +INST(FCMGT_reg_4, "FCMGT (register)", "0Q1011101z1mmmmm111001nnnnnddddd") +INST(FACGT_4, "FACGT", "0Q1011101z1mmmmm111011nnnnnddddd") +INST(FMINP_vec_2, "FMINP (vector)", "0Q1011101z1mmmmm111101nnnnnddddd") +INST(SM3TT1A, "SM3TT1A", "11001110010mmmmm10ii00nnnnnddddd") +INST(SM3TT1B, "SM3TT1B", "11001110010mmmmm10ii01nnnnnddddd") +INST(SM3TT2A, "SM3TT2A", "11001110010mmmmm10ii10nnnnnddddd") +INST(SM3TT2B, "SM3TT2B", "11001110010mmmmm10ii11nnnnnddddd") +INST(FMUL_float, "FMUL (scalar)", "00011110yy1mmmmm000010nnnnnddddd") +INST(FDIV_float, "FDIV (scalar)", "00011110yy1mmmmm000110nnnnnddddd") +INST(FADD_float, "FADD (scalar)", "00011110yy1mmmmm001010nnnnnddddd") +INST(FSUB_float, "FSUB (scalar)", "00011110yy1mmmmm001110nnnnnddddd") +INST(FMAX_float, "FMAX (scalar)", "00011110yy1mmmmm010010nnnnnddddd") +INST(FMIN_float, "FMIN (scalar)", "00011110yy1mmmmm010110nnnnnddddd") +INST(FMAXNM_float, "FMAXNM (scalar)", "00011110yy1mmmmm011010nnnnnddddd") +INST(FMINNM_float, "FMINNM (scalar)", "00011110yy1mmmmm011110nnnnnddddd") +INST(FNMUL_float, "FNMUL (scalar)", "00011110yy1mmmmm100010nnnnnddddd") +INST(LD1R_2, "LD1R", "0Q001101110mmmmm1100zznnnnnttttt") +INST(LD3R_2, "LD3R", "0Q001101110mmmmm1110zznnnnnttttt") +INST(LD2R_2, "LD2R", "0Q001101111mmmmm1100zznnnnnttttt") +INST(LD4R_2, "LD4R", "0Q001101111mmmmm1110zznnnnnttttt") +INST(CRC32, "CRC32B, CRC32H, CRC32W, CRC32X", "z0011010110mmmmm0100zznnnnnddddd") +INST(CRC32C, "CRC32CB, CRC32CH, CRC32CW, CRC32CX", "z0011010110mmmmm0101zznnnnnddddd") +INST(FMLA_elt_2, "FMLA (by element)", "010111111zLMmmmm0001H0nnnnnddddd") +INST(FMLS_elt_2, "FMLS (by element)", "010111111zLMmmmm0101H0nnnnnddddd") +INST(FMUL_elt_2, "FMUL (by element)", "010111111zLMmmmm1001H0nnnnnddddd") +INST(FMULX_elt_2, "FMULX (by element)", "011111111zLMmmmm1001H0nnnnnddddd") +INST(TBL, "TBL", "0Q001110000mmmmm0LL000nnnnnddddd") +INST(TBX, "TBX", "0Q001110000mmmmm0LL100nnnnnddddd") +INST(UZP1, "UZP1", "0Q001110zz0mmmmm000110nnnnnddddd") +INST(TRN1, "TRN1", "0Q001110zz0mmmmm001010nnnnnddddd") +INST(ZIP1, "ZIP1", "0Q001110zz0mmmmm001110nnnnnddddd") +INST(UZP2, "UZP2", "0Q001110zz0mmmmm010110nnnnnddddd") +INST(TRN2, "TRN2", "0Q001110zz0mmmmm011010nnnnnddddd") +INST(ZIP2, "ZIP2", "0Q001110zz0mmmmm011110nnnnnddddd") +INST(SDOT_vec, "SDOT (vector)", "0Q001110zz0mmmmm100101nnnnnddddd") +INST(UDOT_vec, "UDOT (vector)", "0Q101110zz0mmmmm100101nnnnnddddd") +INST(SADDL, "SADDL, SADDL2", "0Q001110zz1mmmmm000000nnnnnddddd") +INST(SADDW, "SADDW, SADDW2", "0Q001110zz1mmmmm000100nnnnnddddd") +INST(SSUBL, "SSUBL, SSUBL2", "0Q001110zz1mmmmm001000nnnnnddddd") +INST(SSUBW, "SSUBW, SSUBW2", "0Q001110zz1mmmmm001100nnnnnddddd") +INST(ADDHN, "ADDHN, ADDHN2", "0Q001110zz1mmmmm010000nnnnnddddd") +INST(SABAL, "SABAL, SABAL2", "0Q001110zz1mmmmm010100nnnnnddddd") +INST(SUBHN, "SUBHN, SUBHN2", "0Q001110zz1mmmmm011000nnnnnddddd") +INST(SABDL, "SABDL, SABDL2", "0Q001110zz1mmmmm011100nnnnnddddd") +INST(SMLAL_vec, "SMLAL, SMLAL2 (vector)", "0Q001110zz1mmmmm100000nnnnnddddd") +INST(SMLSL_vec, "SMLSL, SMLSL2 (vector)", "0Q001110zz1mmmmm101000nnnnnddddd") +INST(SMULL_vec, "SMULL, SMULL2 (vector)", "0Q001110zz1mmmmm110000nnnnnddddd") +INST(PMULL, "PMULL, PMULL2", "0Q001110zz1mmmmm111000nnnnnddddd") +INST(UADDL, "UADDL, UADDL2", "0Q101110zz1mmmmm000000nnnnnddddd") +INST(UADDW, "UADDW, UADDW2", "0Q101110zz1mmmmm000100nnnnnddddd") +INST(USUBL, "USUBL, USUBL2", "0Q101110zz1mmmmm001000nnnnnddddd") +INST(USUBW, "USUBW, USUBW2", "0Q101110zz1mmmmm001100nnnnnddddd") +INST(RADDHN, "RADDHN, RADDHN2", "0Q101110zz1mmmmm010000nnnnnddddd") +INST(UABAL, "UABAL, UABAL2", "0Q101110zz1mmmmm010100nnnnnddddd") +INST(RSUBHN, "RSUBHN, RSUBHN2", "0Q101110zz1mmmmm011000nnnnnddddd") +INST(UABDL, "UABDL, UABDL2", "0Q101110zz1mmmmm011100nnnnnddddd") +INST(UMLAL_vec, "UMLAL, UMLAL2 (vector)", "0Q101110zz1mmmmm100000nnnnnddddd") +INST(UMLSL_vec, "UMLSL, UMLSL2 (vector)", "0Q101110zz1mmmmm101000nnnnnddddd") +INST(UMULL_vec, "UMULL, UMULL2 (vector)", "0Q101110zz1mmmmm110000nnnnnddddd") +INST(SQDMULL_vec_2, "SQDMULL, SQDMULL2 (vector)", "0Q001110zz1mmmmm110100nnnnnddddd") +INST(SHADD, "SHADD", "0Q001110zz1mmmmm000001nnnnnddddd") +INST(SQADD_2, "SQADD", "0Q001110zz1mmmmm000011nnnnnddddd") +INST(SRHADD, "SRHADD", "0Q001110zz1mmmmm000101nnnnnddddd") +INST(SHSUB, "SHSUB", "0Q001110zz1mmmmm001001nnnnnddddd") +INST(SQSUB_2, "SQSUB", "0Q001110zz1mmmmm001011nnnnnddddd") +INST(CMGT_reg_2, "CMGT (register)", "0Q001110zz1mmmmm001101nnnnnddddd") +INST(CMGE_reg_2, "CMGE (register)", "0Q001110zz1mmmmm001111nnnnnddddd") +INST(SSHL_2, "SSHL", "0Q001110zz1mmmmm010001nnnnnddddd") +INST(SQSHL_reg_2, "SQSHL (register)", "0Q001110zz1mmmmm010011nnnnnddddd") +INST(SRSHL_2, "SRSHL", "0Q001110zz1mmmmm010101nnnnnddddd") +INST(SMAX, "SMAX", "0Q001110zz1mmmmm011001nnnnnddddd") +INST(SMIN, "SMIN", "0Q001110zz1mmmmm011011nnnnnddddd") +INST(SABD, "SABD", "0Q001110zz1mmmmm011101nnnnnddddd") +INST(SABA, "SABA", "0Q001110zz1mmmmm011111nnnnnddddd") +INST(ADD_vector, "ADD (vector)", "0Q001110zz1mmmmm100001nnnnnddddd") +INST(CMTST_2, "CMTST", "0Q001110zz1mmmmm100011nnnnnddddd") +INST(MLA_vec, "MLA (vector)", "0Q001110zz1mmmmm100101nnnnnddddd") +INST(MUL_vec, "MUL (vector)", "0Q001110zz1mmmmm100111nnnnnddddd") +INST(SMAXP, "SMAXP", "0Q001110zz1mmmmm101001nnnnnddddd") +INST(SMINP, "SMINP", "0Q001110zz1mmmmm101011nnnnnddddd") +INST(SQDMULH_vec_2, "SQDMULH (vector)", "0Q001110zz1mmmmm101101nnnnnddddd") +INST(ADDP_vec, "ADDP (vector)", "0Q001110zz1mmmmm101111nnnnnddddd") +INST(UHADD, "UHADD", "0Q101110zz1mmmmm000001nnnnnddddd") +INST(UQADD_2, "UQADD", "0Q101110zz1mmmmm000011nnnnnddddd") +INST(URHADD, "URHADD", "0Q101110zz1mmmmm000101nnnnnddddd") +INST(UHSUB, "UHSUB", "0Q101110zz1mmmmm001001nnnnnddddd") +INST(UQSUB_2, "UQSUB", "0Q101110zz1mmmmm001011nnnnnddddd") +INST(CMHI_2, "CMHI (register)", "0Q101110zz1mmmmm001101nnnnnddddd") +INST(CMHS_2, "CMHS (register)", "0Q101110zz1mmmmm001111nnnnnddddd") +INST(USHL_2, "USHL", "0Q101110zz1mmmmm010001nnnnnddddd") +INST(UQSHL_reg_2, "UQSHL (register)", "0Q101110zz1mmmmm010011nnnnnddddd") +INST(URSHL_2, "URSHL", "0Q101110zz1mmmmm010101nnnnnddddd") +INST(UMAX, "UMAX", "0Q101110zz1mmmmm011001nnnnnddddd") +INST(UMIN, "UMIN", "0Q101110zz1mmmmm011011nnnnnddddd") +INST(UABD, "UABD", "0Q101110zz1mmmmm011101nnnnnddddd") +INST(UABA, "UABA", "0Q101110zz1mmmmm011111nnnnnddddd") +INST(SUB_2, "SUB (vector)", "0Q101110zz1mmmmm100001nnnnnddddd") +INST(CMEQ_reg_2, "CMEQ (register)", "0Q101110zz1mmmmm100011nnnnnddddd") +INST(MLS_vec, "MLS (vector)", "0Q101110zz1mmmmm100101nnnnnddddd") +INST(PMUL, "PMUL", "0Q101110zz1mmmmm100111nnnnnddddd") +INST(UMAXP, "UMAXP", "0Q101110zz1mmmmm101001nnnnnddddd") +INST(UMINP, "UMINP", "0Q101110zz1mmmmm101011nnnnnddddd") +INST(SQRDMULH_vec_2, "SQRDMULH (vector)", "0Q101110zz1mmmmm101101nnnnnddddd") +INST(SSHR_2, "SSHR", "0Q0011110IIIIiii000001nnnnnddddd") +INST(SSRA_2, "SSRA", "0Q0011110IIIIiii000101nnnnnddddd") +INST(SRSHR_2, "SRSHR", "0Q0011110IIIIiii001001nnnnnddddd") +INST(SRSRA_2, "SRSRA", "0Q0011110IIIIiii001101nnnnnddddd") +INST(SHL_2, "SHL", "0Q0011110IIIIiii010101nnnnnddddd") +INST(SQSHL_imm_2, "SQSHL (immediate)", "0Q0011110IIIIiii011101nnnnnddddd") +INST(SHRN, "SHRN, SHRN2", "0Q0011110IIIIiii100001nnnnnddddd") +INST(RSHRN, "RSHRN, RSHRN2", "0Q0011110IIIIiii100011nnnnnddddd") +INST(SQSHRN_2, "SQSHRN, SQSHRN2", "0Q0011110IIIIiii100101nnnnnddddd") +INST(SQRSHRN_2, "SQRSHRN, SQRSHRN2", "0Q0011110IIIIiii100111nnnnnddddd") +INST(SSHLL, "SSHLL, SSHLL2", "0Q0011110IIIIiii101001nnnnnddddd") +INST(SCVTF_fix_2, "SCVTF (vector, fixed-point)", "0Q0011110IIIIiii111001nnnnnddddd") +INST(FCVTZS_fix_2, "FCVTZS (vector, fixed-point)", "0Q0011110IIIIiii111111nnnnnddddd") +INST(USHR_2, "USHR", "0Q1011110IIIIiii000001nnnnnddddd") +INST(USRA_2, "USRA", "0Q1011110IIIIiii000101nnnnnddddd") +INST(URSHR_2, "URSHR", "0Q1011110IIIIiii001001nnnnnddddd") +INST(URSRA_2, "URSRA", "0Q1011110IIIIiii001101nnnnnddddd") +INST(SRI_2, "SRI", "0Q1011110IIIIiii010001nnnnnddddd") +INST(SLI_2, "SLI", "0Q1011110IIIIiii010101nnnnnddddd") +INST(SQSHLU_2, "SQSHLU", "0Q1011110IIIIiii011001nnnnnddddd") +INST(UQSHL_imm_2, "UQSHL (immediate)", "0Q1011110IIIIiii011101nnnnnddddd") +INST(SQSHRUN_2, "SQSHRUN, SQSHRUN2", "0Q1011110IIIIiii100001nnnnnddddd") +INST(SQRSHRUN_2, "SQRSHRUN, SQRSHRUN2", "0Q1011110IIIIiii100011nnnnnddddd") +INST(UQSHRN_2, "UQSHRN, UQSHRN2", "0Q1011110IIIIiii100101nnnnnddddd") +INST(UQRSHRN_2, "UQRSHRN, UQRSHRN2", "0Q1011110IIIIiii100111nnnnnddddd") +INST(USHLL, "USHLL, USHLL2", "0Q1011110IIIIiii101001nnnnnddddd") +INST(UCVTF_fix_2, "UCVTF (vector, fixed-point)", "0Q1011110IIIIiii111001nnnnnddddd") +INST(FCVTZU_fix_2, "FCVTZU (vector, fixed-point)", "0Q1011110IIIIiii111111nnnnnddddd") +INST(FMLA_elt_3, "FMLA (by element)", "0Q00111100LMmmmm0001H0nnnnnddddd") +INST(FMLS_elt_3, "FMLS (by element)", "0Q00111100LMmmmm0101H0nnnnnddddd") +INST(UnallocatedEncoding, "", "10111000110---------00----------") +INST(PRFM_unscaled_imm, "PRFM (unscaled offset)", "11111000100iiiiiiiii00nnnnnttttt") +INST(STTRB, "STTRB", "00111000000iiiiiiiii10nnnnnttttt") +INST(LDTRB, "LDTRB", "00111000010iiiiiiiii10nnnnnttttt") +INST(STTRH, "STTRH", "01111000000iiiiiiiii10nnnnnttttt") +INST(LDTRH, "LDTRH", "01111000010iiiiiiiii10nnnnnttttt") +INST(LDTRSW, "LDTRSW", "10111000100iiiiiiiii10nnnnnttttt") +INST(CCMN_reg, "CCMN (register)", "z0111010010mmmmmcccc00nnnnn0ffff") +INST(CCMP_reg, "CCMP (register)", "z1111010010mmmmmcccc00nnnnn0ffff") +INST(CCMN_imm, "CCMN (immediate)", "z0111010010iiiiicccc10nnnnn0ffff") +INST(CCMP_imm, "CCMP (immediate)", "z1111010010iiiiicccc10nnnnn0ffff") +INST(SQDMULL_elt_1, "SQDMULL, SQDMULL2 (by element)", "01011111zzLMmmmm1011H0nnnnnddddd") +INST(SQDMULH_elt_1, "SQDMULH (by element)", "01011111zzLMmmmm1100H0nnnnnddddd") +INST(SQRDMULH_elt_1, "SQRDMULH (by element)", "01011111zzLMmmmm1101H0nnnnnddddd") +INST(INS_elt, "INS (element)", "01101110000iiiii0iiii1nnnnnddddd") +INST(FCADD_vec, "FCADD", "0Q101110zz0mmmmm111r01nnnnnddddd") +INST(FMLA_elt_4, "FMLA (by element)", "0Q0011111zLMmmmm0001H0nnnnnddddd") +INST(FMLS_elt_4, "FMLS (by element)", "0Q0011111zLMmmmm0101H0nnnnnddddd") +INST(FMUL_elt_4, "FMUL (by element)", "0Q0011111zLMmmmm1001H0nnnnnddddd") +INST(FMULX_elt_4, "FMULX (by element)", "0Q1011111zLMmmmm1001H0nnnnnddddd") +INST(SCVTF_float_fix, "SCVTF (scalar, fixed-point)", "z0011110yy000010ppppppnnnnnddddd") +INST(UCVTF_float_fix, "UCVTF (scalar, fixed-point)", "z0011110yy000011ppppppnnnnnddddd") +INST(FCVTZS_float_fix, "FCVTZS (scalar, fixed-point)", "z0011110yy011000ppppppnnnnnddddd") +INST(FCVTZU_float_fix, "FCVTZU (scalar, fixed-point)", "z0011110yy011001ppppppnnnnnddddd") +INST(MSR_reg, "MSR (register)", "110101010001poooNNNNMMMMooottttt") +INST(MRS, "MRS", "110101010011poooNNNNMMMMooottttt") +INST(UnallocatedEncoding, "", "111110001-0---------00----------") +INST(UnallocatedEncoding, "", "10111000110----------1----------") +INST(CSEL, "CSEL", "z0011010100mmmmmcccc00nnnnnddddd") +INST(CSINC, "CSINC", "z0011010100mmmmmcccc01nnnnnddddd") +INST(CSINV, "CSINV", "z1011010100mmmmmcccc00nnnnnddddd") +INST(CSNEG, "CSNEG", "z1011010100mmmmmcccc01nnnnnddddd") +INST(SMADDL, "SMADDL", "10011011001mmmmm0aaaaannnnnddddd") +INST(SMSUBL, "SMSUBL", "10011011001mmmmm1aaaaannnnnddddd") +INST(UMADDL, "UMADDL", "10011011101mmmmm0aaaaannnnnddddd") +INST(UMSUBL, "UMSUBL", "10011011101mmmmm1aaaaannnnnddddd") +INST(EXT, "EXT", "0Q101110000mmmmm0iiii0nnnnnddddd") +INST(FCMLA_vec, "FCMLA", "0Q101110zz0mmmmm110rr1nnnnnddddd") +INST(SMLAL_elt, "SMLAL, SMLAL2 (by element)", "0Q001111zzLMmmmm0010H0nnnnnddddd") +INST(SMLSL_elt, "SMLSL, SMLSL2 (by element)", "0Q001111zzLMmmmm0110H0nnnnnddddd") +INST(MUL_elt, "MUL (by element)", "0Q001111zzLMmmmm1000H0nnnnnddddd") +INST(SMULL_elt, "SMULL, SMULL2 (by element)", "0Q001111zzLMmmmm1010H0nnnnnddddd") +INST(SQDMULL_elt_2, "SQDMULL, SQDMULL2 (by element)", "0Q001111zzLMmmmm1011H0nnnnnddddd") +INST(SQDMULH_elt_2, "SQDMULH (by element)", "0Q001111zzLMmmmm1100H0nnnnnddddd") +INST(SQRDMULH_elt_2, "SQRDMULH (by element)", "0Q001111zzLMmmmm1101H0nnnnnddddd") +INST(SDOT_elt, "SDOT (by element)", "0Q001111zzLMmmmm1110H0nnnnnddddd") +INST(MLA_elt, "MLA (by element)", "0Q101111zzLMmmmm0000H0nnnnnddddd") +INST(UMLAL_elt, "UMLAL, UMLAL2 (by element)", "0Q101111zzLMmmmm0010H0nnnnnddddd") +INST(MLS_elt, "MLS (by element)", "0Q101111zzLMmmmm0100H0nnnnnddddd") +INST(UMLSL_elt, "UMLSL, UMLSL2 (by element)", "0Q101111zzLMmmmm0110H0nnnnnddddd") +INST(UMULL_elt, "UMULL, UMULL2 (by element)", "0Q101111zzLMmmmm1010H0nnnnnddddd") +INST(UDOT_elt, "UDOT (by element)", "0Q101111zzLMmmmm1110H0nnnnnddddd") +INST(EOR3, "EOR3", "11001110000mmmmm0aaaaannnnnddddd") +INST(BCAX, "BCAX", "11001110001mmmmm0aaaaannnnnddddd") +INST(SM3SS1, "SM3SS1", "11001110010mmmmm0aaaaannnnnddddd") +INST(FCCMP_float, "FCCMP", "00011110yy1mmmmmcccc01nnnnn0ffff") +INST(FCCMPE_float, "FCCMPE", "00011110yy1mmmmmcccc01nnnnn1ffff") +INST(ST1_sngl_2, "ST1 (single structure)", "0Q001101100mmmmmoo0Szznnnnnttttt") +INST(ST3_sngl_2, "ST3 (single structure)", "0Q001101100mmmmmoo1Szznnnnnttttt") +INST(ST2_sngl_2, "ST2 (single structure)", "0Q001101101mmmmmoo0Szznnnnnttttt") +INST(ST4_sngl_2, "ST4 (single structure)", "0Q001101101mmmmmoo1Szznnnnnttttt") +INST(LD1_sngl_2, "LD1 (single structure)", "0Q001101110mmmmmoo0Szznnnnnttttt") +INST(LD3_sngl_2, "LD3 (single structure)", "0Q001101110mmmmmoo1Szznnnnnttttt") +INST(LD2_sngl_2, "LD2 (single structure)", "0Q001101111mmmmmoo0Szznnnnnttttt") +INST(LD4_sngl_2, "LD4 (single structure)", "0Q001101111mmmmmoo1Szznnnnnttttt") +INST(STXP, "STXP", "1z001000001sssss0uuuuunnnnnttttt") +INST(STLXP, "STLXP", "1z001000001sssss1uuuuunnnnnttttt") +INST(UnallocatedEncoding, "", "111110001-0----------1----------") +INST(LDTRSB, "LDTRSB", "00111000oo0iiiiiiiii10nnnnnttttt") +INST(LDTRSH, "LDTRSH", "01111000oo0iiiiiiiii10nnnnnttttt") +INST(STTR, "STTR", "zz111000000iiiiiiiii10nnnnnttttt") +INST(LDTR, "LDTR", "zz111000010iiiiiiiii10nnnnnttttt") +INST(MADD, "MADD", "z0011011000mmmmm0aaaaannnnnddddd") +INST(MSUB, "MSUB", "z0011011000mmmmm1aaaaannnnnddddd") +INST(XAR, "XAR", "11001110100mmmmmiiiiiinnnnnddddd") +INST(FCSEL_float, "FCSEL", "00011110yy1mmmmmcccc11nnnnnddddd") +INST(STx_mult_2, "STx (multiple structures)", "0Q001100100mmmmmoooozznnnnnttttt") +INST(LDx_mult_2, "LDx (multiple structures)", "0Q001100110mmmmmoooozznnnnnttttt") +INST(PRFM_imm, "PRFM (immediate)", "1111100110iiiiiiiiiiiinnnnnttttt") +INST(STUR_fpsimd, "STUR (SIMD&FP)", "zz111100o00iiiiiiiii00nnnnnttttt") +INST(LDUR_fpsimd, "LDUR (SIMD&FP)", "zz111100o10iiiiiiiii00nnnnnttttt") +INST(UnallocatedEncoding, "", "1111100111----------------------") +INST(UnallocatedEncoding, "", "1011100111----------------------") +INST(STRx_reg, "STRx (register)", "zz111000o01mmmmmxxxS10nnnnnttttt") +INST(LDRx_reg, "LDRx (register)", "zz111000o11mmmmmxxxS10nnnnnttttt") +INST(STR_reg_fpsimd, "STR (register, SIMD&FP)", "zz111100o01mmmmmxxxS10nnnnnttttt") +INST(LDR_reg_fpsimd, "LDR (register, SIMD&FP)", "zz111100o11mmmmmxxxS10nnnnnttttt") +INST(ADD_ext, "ADD (extended register)", "z0001011001mmmmmxxxiiinnnnnddddd") +INST(ADDS_ext, "ADDS (extended register)", "z0101011001mmmmmxxxiiinnnnnddddd") +INST(SUB_ext, "SUB (extended register)", "z1001011001mmmmmxxxiiinnnnnddddd") +INST(SUBS_ext, "SUBS (extended register)", "z1101011001mmmmmxxxiiinnnnnddddd") +INST(FCMLA_elt, "FCMLA (by element)", "0Q101111zzLMmmmm0rr1H0nnnnnddddd") +INST(FMADD_float, "FMADD", "00011111yy0mmmmm0aaaaannnnnddddd") +INST(FMSUB_float, "FMSUB", "00011111yy0mmmmm1aaaaannnnnddddd") +INST(FNMADD_float, "FNMADD", "00011111yy1mmmmm0aaaaannnnnddddd") +INST(FNMSUB_float, "FNMSUB", "00011111yy1mmmmm1aaaaannnnnddddd") +INST(EXTR, "EXTR", "z00100111N0mmmmmssssssnnnnnddddd") +INST(B_cond, "B.cond", "01010100iiiiiiiiiiiiiiiiiii0cccc") +INST(STURx_LDURx, "STURx/LDURx", "zz111000oo0iiiiiiiii00nnnnnttttt") +INST(STR_imm_fpsimd_1, "STR (immediate, SIMD&FP)", "zz111100o00iiiiiiiiip1nnnnnttttt") +INST(LDR_imm_fpsimd_1, "LDR (immediate, SIMD&FP)", "zz111100o10iiiiiiiiip1nnnnnttttt") +INST(AND_imm, "AND (immediate)", "z00100100Nrrrrrrssssssnnnnnddddd") +INST(ORR_imm, "ORR (immediate)", "z01100100Nrrrrrrssssssnnnnnddddd") +INST(EOR_imm, "EOR (immediate)", "z10100100Nrrrrrrssssssnnnnnddddd") +INST(ANDS_imm, "ANDS (immediate)", "z11100100Nrrrrrrssssssnnnnnddddd") +INST(MOVN, "MOVN", "z00100101ssiiiiiiiiiiiiiiiiddddd") +INST(MOVZ, "MOVZ", "z10100101ssiiiiiiiiiiiiiiiiddddd") +INST(MOVK, "MOVK", "z11100101ssiiiiiiiiiiiiiiiiddddd") +INST(SBFM, "SBFM", "z00100110Nrrrrrrssssssnnnnnddddd") +INST(BFM, "BFM", "z01100110Nrrrrrrssssssnnnnnddddd") +INST(UBFM, "UBFM", "z10100110Nrrrrrrssssssnnnnnddddd") +INST(LDRSW_lit, "LDRSW (literal)", "10011000iiiiiiiiiiiiiiiiiiittttt") +INST(PRFM_lit, "PRFM (literal)", "11011000iiiiiiiiiiiiiiiiiiittttt") +INST(STNP_LDNP_gen, "STNP/LDNP", "o01010000Liiiiiiiuuuuunnnnnttttt") +INST(STRx_LDRx_imm_1, "STRx/LDRx (immediate)", "zz111000oo0iiiiiiiiip1nnnnnttttt") +INST(AND_shift, "AND (shifted register)", "z0001010ss0mmmmmiiiiiinnnnnddddd") +INST(BIC_shift, "BIC (shifted register)", "z0001010ss1mmmmmiiiiiinnnnnddddd") +INST(ORR_shift, "ORR (shifted register)", "z0101010ss0mmmmmiiiiiinnnnnddddd") +INST(ORN_shift, "ORN (shifted register)", "z0101010ss1mmmmmiiiiiinnnnnddddd") +INST(EOR_shift, "EOR (shifted register)", "z1001010ss0mmmmmiiiiiinnnnnddddd") +INST(EON, "EON (shifted register)", "z1001010ss1mmmmmiiiiiinnnnnddddd") +INST(ANDS_shift, "ANDS (shifted register)", "z1101010ss0mmmmmiiiiiinnnnnddddd") +INST(BICS, "BICS (shifted register)", "z1101010ss1mmmmmiiiiiinnnnnddddd") +INST(ADD_shift, "ADD (shifted register)", "z0001011ss0mmmmmiiiiiinnnnnddddd") +INST(ADDS_shift, "ADDS (shifted register)", "z0101011ss0mmmmmiiiiiinnnnnddddd") +INST(SUB_shift, "SUB (shifted register)", "z1001011ss0mmmmmiiiiiinnnnnddddd") +INST(SUBS_shift, "SUBS (shifted register)", "z1101011ss0mmmmmiiiiiinnnnnddddd") +INST(ADD_imm, "ADD (immediate)", "z0010001ssiiiiiiiiiiiinnnnnddddd") +INST(ADDS_imm, "ADDS (immediate)", "z0110001ssiiiiiiiiiiiinnnnnddddd") +INST(SUB_imm, "SUB (immediate)", "z1010001ssiiiiiiiiiiiinnnnnddddd") +INST(SUBS_imm, "SUBS (immediate)", "z1110001ssiiiiiiiiiiiinnnnnddddd") +INST(CBZ, "CBZ", "z0110100iiiiiiiiiiiiiiiiiiittttt") +INST(CBNZ, "CBNZ", "z0110101iiiiiiiiiiiiiiiiiiittttt") +INST(TBZ, "TBZ", "b0110110bbbbbiiiiiiiiiiiiiittttt") +INST(TBNZ, "TBNZ", "b0110111bbbbbiiiiiiiiiiiiiittttt") +INST(LDR_lit_gen, "LDR (literal)", "0z011000iiiiiiiiiiiiiiiiiiittttt") +INST(STNP_LDNP_fpsimd, "STNP/LDNP (SIMD&FP)", "oo1011000Liiiiiiiuuuuunnnnnttttt") +INST(UnallocatedEncoding, "", "--1010000-----------------------") +INST(UnallocatedEncoding, "", "--1011000-----------------------") +INST(STR_imm_fpsimd_2, "STR (immediate, SIMD&FP)", "zz111101o0iiiiiiiiiiiinnnnnttttt") +INST(LDR_imm_fpsimd_2, "LDR (immediate, SIMD&FP)", "zz111101o1iiiiiiiiiiiinnnnnttttt") +INST(ADR, "ADR", "0ii10000iiiiiiiiiiiiiiiiiiiddddd") +INST(ADRP, "ADRP", "1ii10000iiiiiiiiiiiiiiiiiiiddddd") +INST(B_uncond, "B", "000101iiiiiiiiiiiiiiiiiiiiiiiiii") +INST(BL, "BL", "100101iiiiiiiiiiiiiiiiiiiiiiiiii") +INST(LDR_lit_fpsimd, "LDR (literal, SIMD&FP)", "oo011100iiiiiiiiiiiiiiiiiiittttt") +INST(STRx_LDRx_imm_2, "STRx/LDRx (immediate)", "zz111001ooiiiiiiiiiiiinnnnnttttt") +INST(STP_LDP_gen, "STP/LDP", "oo10100pwLiiiiiiiuuuuunnnnnttttt") +INST(STP_LDP_fpsimd, "STP/LDP (SIMD&FP)", "oo10110pwLiiiiiiiuuuuunnnnnttttt") diff --git a/src/dynarmic/src/dynarmic/ir/opcodes.inc b/src/dynarmic/src/dynarmic/ir/opcodes.inc index 6f57f278a3..b1ba5b2993 100644 --- a/src/dynarmic/src/dynarmic/ir/opcodes.inc +++ b/src/dynarmic/src/dynarmic/ir/opcodes.inc @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // First we list common shared opcodes // Since we give priority to A64 performance, we include them first, this is so we // can discard all A32 opcodes instead of having a "hole" in our checks @@ -710,6 +713,8 @@ A64OPC(ExclusiveWriteMemory32, U32, U64, A64OPC(ExclusiveWriteMemory64, U32, U64, U64, U64, AccType ) A64OPC(ExclusiveWriteMemory128, U32, U64, U64, U128, AccType ) +// Remember to update: +// - a32_emit_x64.cpp // A32 Context getters/setters A32OPC(SetCheckBit, Void, U1 ) diff --git a/src/dynarmic/tests/A64/a64.cpp b/src/dynarmic/tests/A64/a64.cpp index 4d4484e53e..e85986ea5a 100644 --- a/src/dynarmic/tests/A64/a64.cpp +++ b/src/dynarmic/tests/A64/a64.cpp @@ -415,6 +415,105 @@ TEST_CASE("A64: URSHL", "[a64]") { CHECK(jit.GetVector(9) == Vector{0x0000000000000002, 0x12db8b8280e0ba}); } +TEST_CASE("A64: SQSHLU", "[a64]") { + A64TestEnv env; + A64::UserConfig jit_user_config{}; + jit_user_config.callbacks = &env; + A64::Jit jit{jit_user_config}; + + oaknut::VectorCodeGenerator code{env.code_mem, nullptr}; + code.SQSHLU(V8.B16(), V0.B16(), 1); + code.SQSHLU(V9.H8(), V1.H8(), 2); + code.SQSHLU(V10.S4(), V2.S4(), 28); + code.SQSHLU(V11.D2(), V3.D2(), 4); + code.SQSHLU(V12.S4(), V0.S4(), 1); + code.SQSHLU(V13.S4(), V1.S4(), 3); + code.SQSHLU(V14.S4(), V2.S4(), 0); + code.SQSHLU(V15.S4(), V3.S4(), 0); + + jit.SetVector(0, Vector{0xffffffff'18ba6a6a, 0x7fffffff'943b954f}); + jit.SetVector(1, Vector{0x0000000b'0000000f, 0xffffffff'ffffffff}); + jit.SetVector(2, Vector{0x00000001'000000ff, 0x00000010'0000007f}); + jit.SetVector(3, Vector{0xffffffffffffffff, 0x96dc5c140705cd04}); + + env.ticks_left = env.code_mem.size(); + CheckedRun([&]() { jit.Run(); }); + + CHECK(jit.GetVector(8) == Vector{0x3000d4d4, 0xfe0000000076009e}); + CHECK(jit.GetVector(9) == Vector{0x2c0000003c, 0}); + CHECK(jit.GetVector(10) == Vector{0x10000000'ffffffff, 0xffffffff'ffffffff}); + CHECK(jit.GetVector(11) == Vector{0, 0}); + CHECK(jit.GetVector(12) == Vector{0x3174d4d4, 0xfffffffe00000000}); + CHECK(jit.GetVector(13) == Vector{0x5800000078, 0}); + CHECK(jit.GetVector(14) == Vector{0x1000000ff, 0x100000007f}); + CHECK(jit.GetVector(15) == Vector{0, 0x705cd04}); +} + +TEST_CASE("A64: SMIN", "[a64]") { + A64TestEnv env; + A64::UserConfig jit_user_config{}; + jit_user_config.callbacks = &env; + A64::Jit jit{jit_user_config}; + + oaknut::VectorCodeGenerator code{env.code_mem, nullptr}; + code.SMIN(V8.B16(), V0.B16(), V3.B16()); + code.SMIN(V9.H8(), V1.H8(), V2.H8()); + code.SMIN(V10.S4(), V2.S4(), V3.S4()); + code.SMIN(V11.S4(), V3.S4(), V3.S4()); + code.SMIN(V12.S4(), V0.S4(), V3.S4()); + code.SMIN(V13.S4(), V1.S4(), V2.S4()); + code.SMIN(V14.S4(), V2.S4(), V1.S4()); + code.SMIN(V15.S4(), V3.S4(), V0.S4()); + + jit.SetPC(0); + jit.SetVector(0, Vector{0xffffffff'18ba6a6a, 0x7fffffff'943b954f}); + jit.SetVector(1, Vector{0x0000000b'0000000f, 0xffffffff'ffffffff}); + jit.SetVector(2, Vector{0x00000001'000000ff, 0x00000010'0000007f}); + jit.SetVector(3, Vector{0xffffffff'ffffffff, 0x96dc5c14'0705cd04}); + + env.ticks_left = 4; + CheckedRun([&]() { jit.Run(); }); + + REQUIRE(jit.GetVector(8) == Vector{0xffffffffffbaffff, 0x96dcffff94059504}); + REQUIRE(jit.GetVector(9) == Vector{0x10000000f, 0xffffffffffffffff}); + REQUIRE(jit.GetVector(10) == Vector{0xffffffffffffffff, 0x96dc5c140000007f}); +} + +TEST_CASE("A64: SMINP", "[a64]") { + A64TestEnv env; + A64::UserConfig jit_user_config{}; + jit_user_config.callbacks = &env; + A64::Jit jit{jit_user_config}; + + oaknut::VectorCodeGenerator code{env.code_mem, nullptr}; + code.SMINP(V8.B16(), V0.B16(), V3.B16()); + code.SMINP(V9.H8(), V1.H8(), V2.H8()); + code.SMINP(V10.S4(), V2.S4(), V1.S4()); + code.SMINP(V11.S4(), V3.S4(), V3.S4()); + code.SMINP(V12.S4(), V0.S4(), V3.S4()); + code.SMINP(V13.S4(), V1.S4(), V2.S4()); + code.SMINP(V14.S4(), V2.S4(), V1.S4()); + code.SMINP(V15.S4(), V3.S4(), V0.S4()); + + jit.SetPC(0); + jit.SetVector(0, Vector{0xffffffff'18ba6a6a, 0x7fffffff'943b954f}); + jit.SetVector(1, Vector{0x0000000b'0000000f, 0xffffffff'ffffffff}); + jit.SetVector(2, Vector{0x00000001'000000ff, 0x00000010'0000007f}); + jit.SetVector(3, Vector{0xffffffff'ffffffff, 0x96dc5c14'0705cd04}); + + env.ticks_left = 4; + CheckedRun([&]() { jit.Run(); }); + + REQUIRE(jit.GetVector(8) == Vector{0xffff9495ffffba6a, 0x961405cdffffffff}); + REQUIRE(jit.GetVector(9) == Vector{0xffffffff00000000, 0}); + REQUIRE(jit.GetVector(10) == Vector{0x1000000001, 0xffffffff0000000b}); + REQUIRE(jit.GetVector(11) == Vector{0x96dc5c14ffffffff, 0x96dc5c14ffffffff}); + REQUIRE(jit.GetVector(12) == Vector{0x943b954fffffffff, 0x96dc5c14ffffffff}); + REQUIRE(jit.GetVector(13) == Vector{0xffffffff0000000b, 0x1000000001}); + REQUIRE(jit.GetVector(14) == Vector{0x1000000001, 0xffffffff0000000b}); + REQUIRE(jit.GetVector(15) == Vector{0x96dc5c14ffffffff, 0x943b954fffffffff}); +} + TEST_CASE("A64: XTN", "[a64]") { A64TestEnv env; A64::UserConfig jit_user_config{}; diff --git a/src/frontend_common/CMakeLists.txt b/src/frontend_common/CMakeLists.txt index a6d580cb18..f9b3805939 100644 --- a/src/frontend_common/CMakeLists.txt +++ b/src/frontend_common/CMakeLists.txt @@ -22,8 +22,6 @@ if (ENABLE_UPDATE_CHECKER) target_sources(frontend_common PRIVATE update_checker.cpp update_checker.h) - - target_compile_definitions(frontend_common PUBLIC CPPHTTPLIB_OPENSSL_SUPPORT) target_link_libraries(frontend_common PRIVATE OpenSSL::SSL OpenSSL::Crypto) endif() diff --git a/src/frontend_common/settings_generator.cpp b/src/frontend_common/settings_generator.cpp index 46625656b1..2b1a0f35d7 100644 --- a/src/frontend_common/settings_generator.cpp +++ b/src/frontend_common/settings_generator.cpp @@ -4,22 +4,21 @@ #include #include #include "common/settings.h" +#include "common/random.h" #include "settings_generator.h" namespace FrontendCommon { void GenerateSettings() { - static std::random_device rd; - + auto gen = Common::Random::GetMT19937(); // Web Token if (Settings::values.eden_token.GetValue().empty()) { static constexpr const size_t token_length = 48; static constexpr const frozen::string token_set = "abcdefghijklmnopqrstuvwxyz"; static std::uniform_int_distribution token_dist(0, token_set.size() - 1); std::string result; - for (size_t i = 0; i < token_length; ++i) { - size_t idx = token_dist(rd); + size_t idx = token_dist(gen); result += token_set[idx]; } Settings::values.eden_token.SetValue(result); @@ -27,8 +26,6 @@ void GenerateSettings() { // Randomly generated number because, well, we fill the rest automagically ;) // Other serial parts are filled by Region_Index - std::random_device device; - std::mt19937 gen(device()); std::uniform_int_distribution distribution(1, (std::numeric_limits::max)()); if (Settings::values.serial_unit.GetValue() == 0) Settings::values.serial_unit.SetValue(distribution(gen)); diff --git a/src/frontend_common/update_checker.cpp b/src/frontend_common/update_checker.cpp index 75b1dc00cc..3e5a832ba0 100644 --- a/src/frontend_common/update_checker.cpp +++ b/src/frontend_common/update_checker.cpp @@ -13,7 +13,7 @@ #include "common/scm_rev.h" #include "update_checker.h" -#include +#include "common/httplib.h" #ifdef YUZU_BUNDLED_OPENSSL #include diff --git a/src/hid_core/frontend/input_converter.cpp b/src/hid_core/frontend/input_converter.cpp index f245a3f769..0709f5c930 100644 --- a/src/hid_core/frontend/input_converter.cpp +++ b/src/hid_core/frontend/input_converter.cpp @@ -1,9 +1,13 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include #include +#include "common/random.h" #include "common/input.h" #include "hid_core/frontend/input_converter.h" @@ -119,15 +123,14 @@ Common::Input::MotionStatus TransformToMotion(const Common::Input::CallbackStatu .properties = properties, }; if (TransformToButton(callback).value) { - std::random_device device; - std::mt19937 gen(device()); std::uniform_int_distribution distribution(-5000, 5000); - status.accel.x.raw_value = static_cast(distribution(gen)) * 0.001f; - status.accel.y.raw_value = static_cast(distribution(gen)) * 0.001f; - status.accel.z.raw_value = static_cast(distribution(gen)) * 0.001f; - status.gyro.x.raw_value = static_cast(distribution(gen)) * 0.001f; - status.gyro.y.raw_value = static_cast(distribution(gen)) * 0.001f; - status.gyro.z.raw_value = static_cast(distribution(gen)) * 0.001f; + auto gen = Common::Random::GetMT19937(); + status.accel.x.raw_value = f32(distribution(gen)) * 0.001f; + status.accel.y.raw_value = f32(distribution(gen)) * 0.001f; + status.accel.z.raw_value = f32(distribution(gen)) * 0.001f; + status.gyro.x.raw_value = f32(distribution(gen)) * 0.001f; + status.gyro.y.raw_value = f32(distribution(gen)) * 0.001f; + status.gyro.z.raw_value = f32(distribution(gen)) * 0.001f; } break; } diff --git a/src/input_common/drivers/udp_client.cpp b/src/input_common/drivers/udp_client.cpp index c930e19de3..fc216e3c9f 100644 --- a/src/input_common/drivers/udp_client.cpp +++ b/src/input_common/drivers/udp_client.cpp @@ -11,6 +11,7 @@ #include "common/logging.h" #include "common/param_package.h" +#include "common/random.h" #include "common/settings.h" #include "input_common/drivers/udp_client.h" #include "input_common/helpers/udp_protocol.h" @@ -31,7 +32,7 @@ public: explicit Socket(const std::string& host, u16 port, SocketCallback callback_) : callback(std::move(callback_)), timer(io_context), - socket(io_context, udp::endpoint(udp::v4(), 0)), client_id(GenerateRandomClientId()) { + socket(io_context, udp::endpoint(udp::v4(), 0)), client_id(Common::Random::Random32(0)) { boost::system::error_code ec{}; auto ipv4 = boost::asio::ip::make_address_v4(host, ec); if (ec.value() != boost::system::errc::success) { @@ -64,11 +65,6 @@ public: } private: - u32 GenerateRandomClientId() const { - std::random_device device; - return device(); - } - void HandleReceive(const boost::system::error_code&, std::size_t bytes_transferred) { if (auto type = Response::Validate(receive_buffer.data(), bytes_transferred)) { switch (*type) { diff --git a/src/network/room.cpp b/src/network/room.cpp index 7c257d2bd4..2069673bae 100644 --- a/src/network/room.cpp +++ b/src/network/room.cpp @@ -23,8 +23,6 @@ namespace Network { class Room::RoomImpl { public: - std::mt19937 random_gen; ///< Random number generator. Used for GenerateFakeIPAddress - ENetHost* server = nullptr; ///< Network interface. std::atomic state{State::Closed}; ///< Current state of the room. @@ -51,7 +49,7 @@ public: IPBanList ip_ban_list; ///< List of banned IP addresses mutable std::mutex ban_list_mutex; ///< Mutex for the ban lists - RoomImpl() : random_gen(std::random_device()()) {} + RoomImpl() {} /// Thread that receives and dispatches network packets std::optional room_thread; diff --git a/src/qt_common/CMakeLists.txt b/src/qt_common/CMakeLists.txt index 904b03d288..399fbe67a0 100644 --- a/src/qt_common/CMakeLists.txt +++ b/src/qt_common/CMakeLists.txt @@ -50,7 +50,6 @@ if (USE_DISCORD_PRESENCE) if (YUZU_USE_BUNDLED_OPENSSL) target_link_libraries(qt_common PUBLIC OpenSSL::SSL OpenSSL::Crypto) - target_compile_definitions(qt_common PRIVATE CPPHTTPLIB_OPENSSL_SUPPORT) endif() target_compile_definitions(qt_common PUBLIC USE_DISCORD_PRESENCE) diff --git a/src/qt_common/discord/discord_impl.cpp b/src/qt_common/discord/discord_impl.cpp index 37b24cdd57..c1cb5319dc 100644 --- a/src/qt_common/discord/discord_impl.cpp +++ b/src/qt_common/discord/discord_impl.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include "common/httplib.h" #include #include diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 668f939546..60b399ccba 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -118,8 +118,6 @@ if (NOT GLSLANG_ERROR STREQUAL "") set(QUIET_FLAG "") endif() -# Shader files must depend on their directory otherwise *BSD make will spontaneously combust -file(MAKE_DIRECTORY "${SHADER_DIR}") foreach(SOURCE_FILE IN ITEMS ${SHADER_FILES}) get_filename_component(FILENAME ${SOURCE_FILE} NAME) string(REPLACE "." "_" SHADER_NAME ${FILENAME}) @@ -135,7 +133,6 @@ foreach(SOURCE_FILE IN ITEMS ${SHADER_FILES}) ${SOURCE_FILE} DEPENDS ${INPUT_FILE} - ${SHADER_DIR} # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified ) set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE}) @@ -151,8 +148,6 @@ foreach(SOURCE_FILE IN ITEMS ${SHADER_FILES}) ${GLSLANGVALIDATOR} -V ${QUIET_FLAG} -I"${FIDELITYFX_INCLUDE_DIR}" ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} --target-env ${SPIR_V_VERSION} MAIN_DEPENDENCY ${SOURCE_FILE} - DEPENDS - ${SHADER_DIR} ) set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE}) endif() @@ -172,7 +167,6 @@ foreach(FILEPATH IN ITEMS ${FIDELITYFX_FILES}) ${SOURCE_FILE} DEPENDS ${INPUT_FILE} - ${SHADER_DIR} # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified ) set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE}) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e989bf6b31..75fbcaa968 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -49,7 +49,7 @@ using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage; -constexpr size_t MAX_IMAGE_ELEMENTS = 64; +constexpr size_t INLINE_IMAGE_ELEMENTS = 64; DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span infos) { DescriptorLayoutBuilder builder{device}; @@ -264,7 +264,11 @@ GraphicsPipeline::GraphicsPipeline( stage_infos[stage] = *info; enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + num_image_elements += Shader::NumDescriptors(info->texture_buffer_descriptors); + num_image_elements += Shader::NumDescriptors(info->image_buffer_descriptors); num_textures += Shader::NumDescriptors(info->texture_descriptors); + num_image_elements += Shader::NumDescriptors(info->texture_descriptors); + num_image_elements += Shader::NumDescriptors(info->image_descriptors); } fragment_has_color0_output = stage_infos[NUM_STAGES - 1].stores_frag_color[0]; auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] { @@ -310,10 +314,10 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { template bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { - std::array views; - std::array samplers; - size_t sampler_index{}; - size_t view_index{}; + small_vector views; + small_vector samplers; + views.reserve(num_image_elements); + samplers.reserve(num_textures); texture_cache.SynchronizeGraphicsDescriptors(); @@ -358,11 +362,11 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - views[view_index++] = { + views.push_back({ .index = handle.first, .blacklist = blacklist, .id = {} - }; + }); } }}; if constexpr (Spec::has_texture_buffers) { @@ -378,10 +382,10 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - views[view_index++] = {handle.first}; + views.push_back({handle.first}); VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)}; - samplers[sampler_index++] = sampler; + samplers.push_back(sampler); } } if constexpr (Spec::has_images) { @@ -407,7 +411,9 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (Spec::enabled_stages[4]) { config_stage(4); } - texture_cache.FillGraphicsImageViews(std::span(views.data(), view_index)); + ASSERT(views.size() == num_image_elements); + ASSERT(samplers.size() == num_textures); + texture_cache.FillGraphicsImageViews(std::span(views.data(), views.size())); VideoCommon::ImageViewInOut* texture_buffer_it{views.data()}; const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { @@ -501,7 +507,8 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { buffer_cache.any_buffer_uploaded = false; } texture_cache.UpdateRenderTargets(false); - texture_cache.CheckFeedbackLoop(views); + texture_cache.CheckFeedbackLoop(std::span{views.data(), + views.size()}); ConfigureDraw(rescaling, render_area); return true; @@ -987,7 +994,7 @@ void GraphicsPipeline::Validate() { num_images += Shader::NumDescriptors(info.texture_descriptors); num_images += Shader::NumDescriptors(info.image_descriptors); } - ASSERT(num_images <= MAX_IMAGE_ELEMENTS); + ASSERT(num_images == num_image_elements); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 34941d6e8d..1a41e50a36 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project @@ -159,6 +159,7 @@ private: std::array stage_infos; std::array enabled_uniform_buffer_masks{}; VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; + size_t num_image_elements{}; u32 num_textures{}; bool fragment_has_color0_output{}; diff --git a/src/web_service/CMakeLists.txt b/src/web_service/CMakeLists.txt index 0dedad16f7..d4debb3c09 100644 --- a/src/web_service/CMakeLists.txt +++ b/src/web_service/CMakeLists.txt @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +# SPDX-License-Identifier: GPL-3.0-or-later + # SPDX-FileCopyrightText: 2018 yuzu Emulator Project # SPDX-License-Identifier: GPL-2.0-or-later @@ -19,4 +22,3 @@ target_link_libraries(web_service PRIVATE common network nlohmann_json::nlohmann find_package(OpenSSL REQUIRED) target_link_libraries(web_service PRIVATE OpenSSL::SSL OpenSSL::Crypto) -target_compile_definitions(web_service PRIVATE CPPHTTPLIB_OPENSSL_SUPPORT) diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp index 60b11fff5a..7fc2ae69b1 100644 --- a/src/web_service/web_backend.cpp +++ b/src/web_service/web_backend.cpp @@ -9,17 +9,7 @@ #include #include - -#ifdef __GNUC__ -#pragma GCC diagnostic push -#ifndef __clang__ -#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" -#endif -#endif -#include -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif +#include "common/httplib.h" #ifdef YUZU_BUNDLED_OPENSSL #include diff --git a/tools/gendynarm.cpp b/tools/gendynarm.cpp new file mode 100644 index 0000000000..3d1588e7e8 --- /dev/null +++ b/tools/gendynarm.cpp @@ -0,0 +1,1646 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace mcl { +template +constexpr std::size_t bitsizeof = CHAR_BIT * sizeof(T); +} +namespace mcl::bit { +template +inline size_t count_ones(T x) { + return std::bitset>(x).count(); +} +} +template +inline consteval std::array StringToArray(const char (&str)[N + 1]) { + std::array result{}; + for (size_t i = 0; i < N; i++) { + result[i] = str[i]; + } + return result; +} +using opcode_type = uint32_t; +constexpr size_t opcode_bitsize = mcl::bitsizeof; +static opcode_type GetMaskAndExpect(std::string bitstring) { + const auto one = opcode_type(1); + opcode_type mask = 0; + for (size_t i = 0; i < opcode_bitsize; i++) { + const size_t bit_position = opcode_bitsize - i - 1; + switch (bitstring[i]) { + case '0': mask |= one << bit_position; break; + case '1': mask |= one << bit_position; break; + default: break; + } + } + return mask; +} +struct SortingInfo { + const char *first; + uint32_t second; + const char *inst_final; +}; +int main(int argc, char *argv[]) { + auto which = std::string{argv[1]}; + std::printf( +"// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project\n" +"// SPDX-License-Identifier: GPL-3.0-or-later\n" +"// DO NOT REORDER\n"); + if (which == "-arm") { + std::vector list = { +#define INST(fn, name, bitstring) { name, GetMaskAndExpect(bitstring), "INST(" #fn ", " #name ", " #bitstring ")" }, +// Barrier instructions +INST(arm_DMB, "DMB", "1111010101111111111100000101oooo") // v7 +INST(arm_DSB, "DSB", "1111010101111111111100000100oooo") // v7 +INST(arm_ISB, "ISB", "1111010101111111111100000110oooo") // v7 + +// Branch instructions +INST(arm_BLX_imm, "BLX (imm)", "1111101hvvvvvvvvvvvvvvvvvvvvvvvv") // v5 +INST(arm_BLX_reg, "BLX (reg)", "cccc000100101111111111110011mmmm") // v5 +INST(arm_B, "B", "cccc1010vvvvvvvvvvvvvvvvvvvvvvvv") // v1 +INST(arm_BL, "BL", "cccc1011vvvvvvvvvvvvvvvvvvvvvvvv") // v1 +INST(arm_BX, "BX", "cccc000100101111111111110001mmmm") // v4T +INST(arm_BXJ, "BXJ", "cccc000100101111111111110010mmmm") // v5J + +// CRC32 instructions +INST(arm_CRC32, "CRC32", "cccc00010zz0nnnndddd00000100mmmm") // v8 +INST(arm_CRC32C, "CRC32C", "cccc00010zz0nnnndddd00100100mmmm") // v8 + +// Coprocessor instructions +INST(arm_CDP, "CDP", "cccc1110ooooNNNNDDDDppppooo0MMMM") // v2 (CDP2: v5) +INST(arm_LDC, "LDC", "cccc110pudw1nnnnDDDDppppvvvvvvvv") // v2 (LDC2: v5) +INST(arm_MCR, "MCR", "cccc1110ooo0NNNNttttppppooo1MMMM") // v2 (MCR2: v5) +INST(arm_MCRR, "MCRR", "cccc11000100uuuuttttppppooooMMMM") // v5E (MCRR2: v6) +INST(arm_MRC, "MRC", "cccc1110ooo1NNNNttttppppooo1MMMM") // v2 (MRC2: v5) +INST(arm_MRRC, "MRRC", "cccc11000101uuuuttttppppooooMMMM") // v5E (MRRC2: v6) +INST(arm_STC, "STC", "cccc110pudw0nnnnDDDDppppvvvvvvvv") // v2 (STC2: v5) + +// Data Processing instructions +INST(arm_ADC_imm, "ADC (imm)", "cccc0010101Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_ADC_reg, "ADC (reg)", "cccc0000101Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_ADC_rsr, "ADC (rsr)", "cccc0000101Snnnnddddssss0rr1mmmm") // v1 +INST(arm_ADD_imm, "ADD (imm)", "cccc0010100Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_ADD_reg, "ADD (reg)", "cccc0000100Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_ADD_rsr, "ADD (rsr)", "cccc0000100Snnnnddddssss0rr1mmmm") // v1 +INST(arm_AND_imm, "AND (imm)", "cccc0010000Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_AND_reg, "AND (reg)", "cccc0000000Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_AND_rsr, "AND (rsr)", "cccc0000000Snnnnddddssss0rr1mmmm") // v1 +INST(arm_BIC_imm, "BIC (imm)", "cccc0011110Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_BIC_reg, "BIC (reg)", "cccc0001110Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_BIC_rsr, "BIC (rsr)", "cccc0001110Snnnnddddssss0rr1mmmm") // v1 +INST(arm_CMN_imm, "CMN (imm)", "cccc00110111nnnn0000rrrrvvvvvvvv") // v1 +INST(arm_CMN_reg, "CMN (reg)", "cccc00010111nnnn0000vvvvvrr0mmmm") // v1 +INST(arm_CMN_rsr, "CMN (rsr)", "cccc00010111nnnn0000ssss0rr1mmmm") // v1 +INST(arm_CMP_imm, "CMP (imm)", "cccc00110101nnnn0000rrrrvvvvvvvv") // v1 +INST(arm_CMP_reg, "CMP (reg)", "cccc00010101nnnn0000vvvvvrr0mmmm") // v1 +INST(arm_CMP_rsr, "CMP (rsr)", "cccc00010101nnnn0000ssss0rr1mmmm") // v1 +INST(arm_EOR_imm, "EOR (imm)", "cccc0010001Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_EOR_reg, "EOR (reg)", "cccc0000001Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_EOR_rsr, "EOR (rsr)", "cccc0000001Snnnnddddssss0rr1mmmm") // v1 +INST(arm_MOV_imm, "MOV (imm)", "cccc0011101S0000ddddrrrrvvvvvvvv") // v1 +INST(arm_MOV_reg, "MOV (reg)", "cccc0001101S0000ddddvvvvvrr0mmmm") // v1 +INST(arm_MOV_rsr, "MOV (rsr)", "cccc0001101S0000ddddssss0rr1mmmm") // v1 +INST(arm_MVN_imm, "MVN (imm)", "cccc0011111S0000ddddrrrrvvvvvvvv") // v1 +INST(arm_MVN_reg, "MVN (reg)", "cccc0001111S0000ddddvvvvvrr0mmmm") // v1 +INST(arm_MVN_rsr, "MVN (rsr)", "cccc0001111S0000ddddssss0rr1mmmm") // v1 +INST(arm_ORR_imm, "ORR (imm)", "cccc0011100Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_ORR_reg, "ORR (reg)", "cccc0001100Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_ORR_rsr, "ORR (rsr)", "cccc0001100Snnnnddddssss0rr1mmmm") // v1 +INST(arm_RSB_imm, "RSB (imm)", "cccc0010011Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_RSB_reg, "RSB (reg)", "cccc0000011Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_RSB_rsr, "RSB (rsr)", "cccc0000011Snnnnddddssss0rr1mmmm") // v1 +INST(arm_RSC_imm, "RSC (imm)", "cccc0010111Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_RSC_reg, "RSC (reg)", "cccc0000111Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_RSC_rsr, "RSC (rsr)", "cccc0000111Snnnnddddssss0rr1mmmm") // v1 +INST(arm_SBC_imm, "SBC (imm)", "cccc0010110Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_SBC_reg, "SBC (reg)", "cccc0000110Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_SBC_rsr, "SBC (rsr)", "cccc0000110Snnnnddddssss0rr1mmmm") // v1 +INST(arm_SUB_imm, "SUB (imm)", "cccc0010010Snnnnddddrrrrvvvvvvvv") // v1 +INST(arm_SUB_reg, "SUB (reg)", "cccc0000010Snnnnddddvvvvvrr0mmmm") // v1 +INST(arm_SUB_rsr, "SUB (rsr)", "cccc0000010Snnnnddddssss0rr1mmmm") // v1 +INST(arm_TEQ_imm, "TEQ (imm)", "cccc00110011nnnn0000rrrrvvvvvvvv") // v1 +INST(arm_TEQ_reg, "TEQ (reg)", "cccc00010011nnnn0000vvvvvrr0mmmm") // v1 +INST(arm_TEQ_rsr, "TEQ (rsr)", "cccc00010011nnnn0000ssss0rr1mmmm") // v1 +INST(arm_TST_imm, "TST (imm)", "cccc00110001nnnn0000rrrrvvvvvvvv") // v1 +INST(arm_TST_reg, "TST (reg)", "cccc00010001nnnn0000vvvvvrr0mmmm") // v1 +INST(arm_TST_rsr, "TST (rsr)", "cccc00010001nnnn0000ssss0rr1mmmm") // v1 + +// Exception Generating instructions +INST(arm_BKPT, "BKPT", "cccc00010010vvvvvvvvvvvv0111vvvv") // v5 +INST(arm_SVC, "SVC", "cccc1111vvvvvvvvvvvvvvvvvvvvvvvv") // v1 +INST(arm_UDF, "UDF", "111001111111------------1111----") + +// Extension instructions +INST(arm_SXTB, "SXTB", "cccc011010101111ddddrr000111mmmm") // v6 +INST(arm_SXTB16, "SXTB16", "cccc011010001111ddddrr000111mmmm") // v6 +INST(arm_SXTH, "SXTH", "cccc011010111111ddddrr000111mmmm") // v6 +INST(arm_SXTAB, "SXTAB", "cccc01101010nnnnddddrr000111mmmm") // v6 +INST(arm_SXTAB16, "SXTAB16", "cccc01101000nnnnddddrr000111mmmm") // v6 +INST(arm_SXTAH, "SXTAH", "cccc01101011nnnnddddrr000111mmmm") // v6 +INST(arm_UXTB, "UXTB", "cccc011011101111ddddrr000111mmmm") // v6 +INST(arm_UXTB16, "UXTB16", "cccc011011001111ddddrr000111mmmm") // v6 +INST(arm_UXTH, "UXTH", "cccc011011111111ddddrr000111mmmm") // v6 +INST(arm_UXTAB, "UXTAB", "cccc01101110nnnnddddrr000111mmmm") // v6 +INST(arm_UXTAB16, "UXTAB16", "cccc01101100nnnnddddrr000111mmmm") // v6 +INST(arm_UXTAH, "UXTAH", "cccc01101111nnnnddddrr000111mmmm") // v6 + +// Hint instructions +INST(arm_PLD_imm, "PLD (imm)", "11110101uz01nnnn1111iiiiiiiiiiii") // v5E for PLD; v7 for PLDW +INST(arm_PLD_reg, "PLD (reg)", "11110111uz01nnnn1111iiiiitt0mmmm") // v5E for PLD; v7 for PLDW +INST(arm_SEV, "SEV", "----0011001000001111000000000100") // v6K +INST(arm_SEVL, "SEVL", "----0011001000001111000000000101") // v8 +INST(arm_WFE, "WFE", "----0011001000001111000000000010") // v6K +INST(arm_WFI, "WFI", "----0011001000001111000000000011") // v6K +INST(arm_YIELD, "YIELD", "----0011001000001111000000000001") // v6K +INST(arm_NOP, "Reserved Hint", "----0011001000001111------------") +INST(arm_NOP, "Reserved Hint", "----001100100000111100000000----") + +// Synchronization Primitive instructions +INST(arm_CLREX, "CLREX", "11110101011111111111000000011111") // v6K +INST(arm_SWP, "SWP", "cccc00010000nnnntttt00001001uuuu") // v2S (v6: Deprecated) +INST(arm_SWPB, "SWPB", "cccc00010100nnnntttt00001001uuuu") // v2S (v6: Deprecated) +INST(arm_STL, "STL", "cccc00011000nnnn111111001001tttt") // v8 +INST(arm_STLEX, "STLEX", "cccc00011000nnnndddd11101001tttt") // v8 +INST(arm_STREX, "STREX", "cccc00011000nnnndddd11111001mmmm") // v6 +INST(arm_LDA, "LDA", "cccc00011001nnnndddd110010011111") // v8 +INST(arm_LDAEX, "LDAEX", "cccc00011001nnnndddd111010011111") // v8 +INST(arm_LDREX, "LDREX", "cccc00011001nnnndddd111110011111") // v6 +INST(arm_STLEXD, "STLEXD", "cccc00011010nnnndddd11101001mmmm") // v8 +INST(arm_STREXD, "STREXD", "cccc00011010nnnndddd11111001mmmm") // v6K +INST(arm_LDAEXD, "LDAEXD", "cccc00011011nnnndddd111010011111") // v8 +INST(arm_LDREXD, "LDREXD", "cccc00011011nnnndddd111110011111") // v6K +INST(arm_STLB, "STLB", "cccc00011100nnnn111111001001tttt") // v8 +INST(arm_STLEXB, "STLEXB", "cccc00011100nnnndddd11101001mmmm") // v8 +INST(arm_STREXB, "STREXB", "cccc00011100nnnndddd11111001mmmm") // v6K +INST(arm_LDAB, "LDAB", "cccc00011101nnnndddd110010011111") // v8 +INST(arm_LDAEXB, "LDAEXB", "cccc00011101nnnndddd111010011111") // v8 +INST(arm_LDREXB, "LDREXB", "cccc00011101nnnndddd111110011111") // v6K +INST(arm_STLH, "STLH", "cccc00011110nnnn111111001001mmmm") // v8 +INST(arm_STLEXH, "STLEXH", "cccc00011110nnnndddd11101001mmmm") // v8 +INST(arm_STREXH, "STREXH", "cccc00011110nnnndddd11111001mmmm") // v6K +INST(arm_LDAH, "LDAH", "cccc00011111nnnndddd110010011111") // v8 +INST(arm_LDAEXH, "LDAEXH", "cccc00011111nnnndddd111010011111") // v8 +INST(arm_LDREXH, "LDREXH", "cccc00011111nnnndddd111110011111") // v6K + +// Load/Store instructions +INST(arm_LDRBT, "LDRBT (A1)", "----0100-111--------------------") // v1 +INST(arm_LDRBT, "LDRBT (A2)", "----0110-111---------------0----") // v1 +INST(arm_LDRHT, "LDRHT (A1)", "----0000-111------------1011----") // v6T2 +INST(arm_LDRHT, "LDRHT (A1)", "----0000-1111111--------1011----") // v6T2 +INST(arm_LDRHT, "LDRHT (A2)", "----0000-011--------00001011----") // v6T2 +INST(arm_LDRSBT, "LDRSBT (A1)", "----0000-111------------1101----") // v6T2 +INST(arm_LDRSBT, "LDRSBT (A2)", "----0000-011--------00001101----") // v6T2 +INST(arm_LDRSHT, "LDRSHT (A1)", "----0000-111------------1111----") // v6T2 +INST(arm_LDRSHT, "LDRSHT (A2)", "----0000-011--------00001111----") // v6T2 +INST(arm_LDRT, "LDRT (A1)", "----0100-011--------------------") // v1 +INST(arm_LDRT, "LDRT (A2)", "----0110-011---------------0----") // v1 +INST(arm_STRBT, "STRBT (A1)", "----0100-110--------------------") // v1 +INST(arm_STRBT, "STRBT (A2)", "----0110-110---------------0----") // v1 +INST(arm_STRHT, "STRHT (A1)", "----0000-110------------1011----") // v6T2 +INST(arm_STRHT, "STRHT (A2)", "----0000-010--------00001011----") // v6T2 +INST(arm_STRT, "STRT (A1)", "----0100-010--------------------") // v1 +INST(arm_STRT, "STRT (A2)", "----0110-010---------------0----") // v1 +INST(arm_LDR_lit, "LDR (lit)", "cccc0101u0011111ttttvvvvvvvvvvvv") // v1 +INST(arm_LDR_imm, "LDR (imm)", "cccc010pu0w1nnnnttttvvvvvvvvvvvv") // v1 +INST(arm_LDR_reg, "LDR (reg)", "cccc011pu0w1nnnnttttvvvvvrr0mmmm") // v1 +INST(arm_LDRB_lit, "LDRB (lit)", "cccc0101u1011111ttttvvvvvvvvvvvv") // v1 +INST(arm_LDRB_imm, "LDRB (imm)", "cccc010pu1w1nnnnttttvvvvvvvvvvvv") // v1 +INST(arm_LDRB_reg, "LDRB (reg)", "cccc011pu1w1nnnnttttvvvvvrr0mmmm") // v1 +INST(arm_LDRD_lit, "LDRD (lit)", "cccc0001u1001111ttttvvvv1101vvvv") // v5E +INST(arm_LDRD_imm, "LDRD (imm)", "cccc000pu1w0nnnnttttvvvv1101vvvv") // v5E +INST(arm_LDRD_reg, "LDRD (reg)", "cccc000pu0w0nnnntttt00001101mmmm") // v5E +INST(arm_LDRH_lit, "LDRH (lit)", "cccc000pu1w11111ttttvvvv1011vvvv") // v4 +INST(arm_LDRH_imm, "LDRH (imm)", "cccc000pu1w1nnnnttttvvvv1011vvvv") // v4 +INST(arm_LDRH_reg, "LDRH (reg)", "cccc000pu0w1nnnntttt00001011mmmm") // v4 +INST(arm_LDRSB_lit, "LDRSB (lit)", "cccc0001u1011111ttttvvvv1101vvvv") // v4 +INST(arm_LDRSB_imm, "LDRSB (imm)", "cccc000pu1w1nnnnttttvvvv1101vvvv") // v4 +INST(arm_LDRSB_reg, "LDRSB (reg)", "cccc000pu0w1nnnntttt00001101mmmm") // v4 +INST(arm_LDRSH_lit, "LDRSH (lit)", "cccc0001u1011111ttttvvvv1111vvvv") // v4 +INST(arm_LDRSH_imm, "LDRSH (imm)", "cccc000pu1w1nnnnttttvvvv1111vvvv") // v4 +INST(arm_LDRSH_reg, "LDRSH (reg)", "cccc000pu0w1nnnntttt00001111mmmm") // v4 +INST(arm_STR_imm, "STR (imm)", "cccc010pu0w0nnnnttttvvvvvvvvvvvv") // v1 +INST(arm_STR_reg, "STR (reg)", "cccc011pu0w0nnnnttttvvvvvrr0mmmm") // v1 +INST(arm_STRB_imm, "STRB (imm)", "cccc010pu1w0nnnnttttvvvvvvvvvvvv") // v1 +INST(arm_STRB_reg, "STRB (reg)", "cccc011pu1w0nnnnttttvvvvvrr0mmmm") // v1 +INST(arm_STRD_imm, "STRD (imm)", "cccc000pu1w0nnnnttttvvvv1111vvvv") // v5E +INST(arm_STRD_reg, "STRD (reg)", "cccc000pu0w0nnnntttt00001111mmmm") // v5E +INST(arm_STRH_imm, "STRH (imm)", "cccc000pu1w0nnnnttttvvvv1011vvvv") // v4 +INST(arm_STRH_reg, "STRH (reg)", "cccc000pu0w0nnnntttt00001011mmmm") // v4 + +// Load/Store Multiple instructions +INST(arm_LDM, "LDM", "cccc100010w1nnnnxxxxxxxxxxxxxxxx") // v1 +INST(arm_LDMDA, "LDMDA", "cccc100000w1nnnnxxxxxxxxxxxxxxxx") // v1 +INST(arm_LDMDB, "LDMDB", "cccc100100w1nnnnxxxxxxxxxxxxxxxx") // v1 +INST(arm_LDMIB, "LDMIB", "cccc100110w1nnnnxxxxxxxxxxxxxxxx") // v1 +INST(arm_LDM_usr, "LDM (usr reg)", "----100--101--------------------") // v1 +INST(arm_LDM_eret, "LDM (exce ret)", "----100--1-1----1---------------") // v1 +INST(arm_STM, "STM", "cccc100010w0nnnnxxxxxxxxxxxxxxxx") // v1 +INST(arm_STMDA, "STMDA", "cccc100000w0nnnnxxxxxxxxxxxxxxxx") // v1 +INST(arm_STMDB, "STMDB", "cccc100100w0nnnnxxxxxxxxxxxxxxxx") // v1 +INST(arm_STMIB, "STMIB", "cccc100110w0nnnnxxxxxxxxxxxxxxxx") // v1 +INST(arm_STM_usr, "STM (usr reg)", "----100--100--------------------") // v1 + +// Miscellaneous instructions +INST(arm_BFC, "BFC", "cccc0111110vvvvvddddvvvvv0011111") // v6T2 +INST(arm_BFI, "BFI", "cccc0111110vvvvvddddvvvvv001nnnn") // v6T2 +INST(arm_CLZ, "CLZ", "cccc000101101111dddd11110001mmmm") // v5 +INST(arm_MOVT, "MOVT", "cccc00110100vvvvddddvvvvvvvvvvvv") // v6T2 +INST(arm_MOVW, "MOVW", "cccc00110000vvvvddddvvvvvvvvvvvv") // v6T2 +INST(arm_NOP, "NOP", "----0011001000001111000000000000") // v6K +INST(arm_SBFX, "SBFX", "cccc0111101wwwwwddddvvvvv101nnnn") // v6T2 +INST(arm_SEL, "SEL", "cccc01101000nnnndddd11111011mmmm") // v6 +INST(arm_UBFX, "UBFX", "cccc0111111wwwwwddddvvvvv101nnnn") // v6T2 + +// Unsigned Sum of Absolute Differences instructions +INST(arm_USAD8, "USAD8", "cccc01111000dddd1111mmmm0001nnnn") // v6 +INST(arm_USADA8, "USADA8", "cccc01111000ddddaaaammmm0001nnnn") // v6 + +// Packing instructions +INST(arm_PKHBT, "PKHBT", "cccc01101000nnnnddddvvvvv001mmmm") // v6K +INST(arm_PKHTB, "PKHTB", "cccc01101000nnnnddddvvvvv101mmmm") // v6K + +// Reversal instructions +INST(arm_RBIT, "RBIT", "cccc011011111111dddd11110011mmmm") // v6T2 +INST(arm_REV, "REV", "cccc011010111111dddd11110011mmmm") // v6 +INST(arm_REV16, "REV16", "cccc011010111111dddd11111011mmmm") // v6 +INST(arm_REVSH, "REVSH", "cccc011011111111dddd11111011mmmm") // v6 + +// Saturation instructions +INST(arm_SSAT, "SSAT", "cccc0110101vvvvvddddvvvvvr01nnnn") // v6 +INST(arm_SSAT16, "SSAT16", "cccc01101010vvvvdddd11110011nnnn") // v6 +INST(arm_USAT, "USAT", "cccc0110111vvvvvddddvvvvvr01nnnn") // v6 +INST(arm_USAT16, "USAT16", "cccc01101110vvvvdddd11110011nnnn") // v6 + +// Divide instructions +INST(arm_SDIV, "SDIV", "cccc01110001dddd1111mmmm0001nnnn") // v7a +INST(arm_UDIV, "UDIV", "cccc01110011dddd1111mmmm0001nnnn") // v7a + +// Multiply (Normal) instructions +INST(arm_MLA, "MLA", "cccc0000001Sddddaaaammmm1001nnnn") // v2 +INST(arm_MLS, "MLS", "cccc00000110ddddaaaammmm1001nnnn") // v6T2 +INST(arm_MUL, "MUL", "cccc0000000Sdddd0000mmmm1001nnnn") // v2 + +// Multiply (Long) instructions +INST(arm_SMLAL, "SMLAL", "cccc0000111Sddddaaaammmm1001nnnn") // v3M +INST(arm_SMULL, "SMULL", "cccc0000110Sddddaaaammmm1001nnnn") // v3M +INST(arm_UMAAL, "UMAAL", "cccc00000100ddddaaaammmm1001nnnn") // v6 +INST(arm_UMLAL, "UMLAL", "cccc0000101Sddddaaaammmm1001nnnn") // v3M +INST(arm_UMULL, "UMULL", "cccc0000100Sddddaaaammmm1001nnnn") // v3M + +// Multiply (Halfword) instructions +INST(arm_SMLALxy, "SMLALXY", "cccc00010100ddddaaaammmm1xy0nnnn") // v5xP +INST(arm_SMLAxy, "SMLAXY", "cccc00010000ddddaaaammmm1xy0nnnn") // v5xP +INST(arm_SMULxy, "SMULXY", "cccc00010110dddd0000mmmm1xy0nnnn") // v5xP + +// Multiply (Word by Halfword) instructions +INST(arm_SMLAWy, "SMLAWY", "cccc00010010ddddaaaammmm1y00nnnn") // v5xP +INST(arm_SMULWy, "SMULWY", "cccc00010010dddd0000mmmm1y10nnnn") // v5xP + +// Multiply (Most Significant Word) instructions +INST(arm_SMMUL, "SMMUL", "cccc01110101dddd1111mmmm00R1nnnn") // v6 +INST(arm_SMMLA, "SMMLA", "cccc01110101ddddaaaammmm00R1nnnn") // v6 +INST(arm_SMMLS, "SMMLS", "cccc01110101ddddaaaammmm11R1nnnn") // v6 + +// Multiply (Dual) instructions +INST(arm_SMLAD, "SMLAD", "cccc01110000ddddaaaammmm00M1nnnn") // v6 +INST(arm_SMLALD, "SMLALD", "cccc01110100ddddaaaammmm00M1nnnn") // v6 +INST(arm_SMLSD, "SMLSD", "cccc01110000ddddaaaammmm01M1nnnn") // v6 +INST(arm_SMLSLD, "SMLSLD", "cccc01110100ddddaaaammmm01M1nnnn") // v6 +INST(arm_SMUAD, "SMUAD", "cccc01110000dddd1111mmmm00M1nnnn") // v6 +INST(arm_SMUSD, "SMUSD", "cccc01110000dddd1111mmmm01M1nnnn") // v6 + +// Parallel Add/Subtract (Modulo) instructions +INST(arm_SADD8, "SADD8", "cccc01100001nnnndddd11111001mmmm") // v6 +INST(arm_SADD16, "SADD16", "cccc01100001nnnndddd11110001mmmm") // v6 +INST(arm_SASX, "SASX", "cccc01100001nnnndddd11110011mmmm") // v6 +INST(arm_SSAX, "SSAX", "cccc01100001nnnndddd11110101mmmm") // v6 +INST(arm_SSUB8, "SSUB8", "cccc01100001nnnndddd11111111mmmm") // v6 +INST(arm_SSUB16, "SSUB16", "cccc01100001nnnndddd11110111mmmm") // v6 +INST(arm_UADD8, "UADD8", "cccc01100101nnnndddd11111001mmmm") // v6 +INST(arm_UADD16, "UADD16", "cccc01100101nnnndddd11110001mmmm") // v6 +INST(arm_UASX, "UASX", "cccc01100101nnnndddd11110011mmmm") // v6 +INST(arm_USAX, "USAX", "cccc01100101nnnndddd11110101mmmm") // v6 +INST(arm_USUB8, "USUB8", "cccc01100101nnnndddd11111111mmmm") // v6 +INST(arm_USUB16, "USUB16", "cccc01100101nnnndddd11110111mmmm") // v6 + +// Parallel Add/Subtract (Saturating) instructions +INST(arm_QADD8, "QADD8", "cccc01100010nnnndddd11111001mmmm") // v6 +INST(arm_QADD16, "QADD16", "cccc01100010nnnndddd11110001mmmm") // v6 +INST(arm_QASX, "QASX", "cccc01100010nnnndddd11110011mmmm") // v6 +INST(arm_QSAX, "QSAX", "cccc01100010nnnndddd11110101mmmm") // v6 +INST(arm_QSUB8, "QSUB8", "cccc01100010nnnndddd11111111mmmm") // v6 +INST(arm_QSUB16, "QSUB16", "cccc01100010nnnndddd11110111mmmm") // v6 +INST(arm_UQADD8, "UQADD8", "cccc01100110nnnndddd11111001mmmm") // v6 +INST(arm_UQADD16, "UQADD16", "cccc01100110nnnndddd11110001mmmm") // v6 +INST(arm_UQASX, "UQASX", "cccc01100110nnnndddd11110011mmmm") // v6 +INST(arm_UQSAX, "UQSAX", "cccc01100110nnnndddd11110101mmmm") // v6 +INST(arm_UQSUB8, "UQSUB8", "cccc01100110nnnndddd11111111mmmm") // v6 +INST(arm_UQSUB16, "UQSUB16", "cccc01100110nnnndddd11110111mmmm") // v6 + +// Parallel Add/Subtract (Halving) instructions +INST(arm_SHADD8, "SHADD8", "cccc01100011nnnndddd11111001mmmm") // v6 +INST(arm_SHADD16, "SHADD16", "cccc01100011nnnndddd11110001mmmm") // v6 +INST(arm_SHASX, "SHASX", "cccc01100011nnnndddd11110011mmmm") // v6 +INST(arm_SHSAX, "SHSAX", "cccc01100011nnnndddd11110101mmmm") // v6 +INST(arm_SHSUB8, "SHSUB8", "cccc01100011nnnndddd11111111mmmm") // v6 +INST(arm_SHSUB16, "SHSUB16", "cccc01100011nnnndddd11110111mmmm") // v6 +INST(arm_UHADD8, "UHADD8", "cccc01100111nnnndddd11111001mmmm") // v6 +INST(arm_UHADD16, "UHADD16", "cccc01100111nnnndddd11110001mmmm") // v6 +INST(arm_UHASX, "UHASX", "cccc01100111nnnndddd11110011mmmm") // v6 +INST(arm_UHSAX, "UHSAX", "cccc01100111nnnndddd11110101mmmm") // v6 +INST(arm_UHSUB8, "UHSUB8", "cccc01100111nnnndddd11111111mmmm") // v6 +INST(arm_UHSUB16, "UHSUB16", "cccc01100111nnnndddd11110111mmmm") // v6 + +// Saturated Add/Subtract instructions +INST(arm_QADD, "QADD", "cccc00010000nnnndddd00000101mmmm") // v5xP +INST(arm_QSUB, "QSUB", "cccc00010010nnnndddd00000101mmmm") // v5xP +INST(arm_QDADD, "QDADD", "cccc00010100nnnndddd00000101mmmm") // v5xP +INST(arm_QDSUB, "QDSUB", "cccc00010110nnnndddd00000101mmmm") // v5xP + +// Status Register Access instructions +INST(arm_CPS, "CPS", "111100010000---00000000---0-----") // v6 +INST(arm_SETEND, "SETEND", "1111000100000001000000e000000000") // v6 +INST(arm_MRS, "MRS", "cccc000100001111dddd000000000000") // v3 +INST(arm_MSR_imm, "MSR (imm)", "cccc00110010mmmm1111rrrrvvvvvvvv") // v3 +INST(arm_MSR_reg, "MSR (reg)", "cccc00010010mmmm111100000000nnnn") // v3 +INST(arm_RFE, "RFE", "1111100--0-1----0000101000000000") // v6 +INST(arm_SRS, "SRS", "1111100--1-0110100000101000-----") // v6 +#undef INST + }; + // If a matcher has more bits in its mask it is more specific, so it should come first. + std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) { + return mcl::bit::count_ones(matcher1.second) > mcl::bit::count_ones(matcher2.second); + }); + for (auto const& e : list) + printf("%s\n", e.inst_final); + } else if (which == "-asimd") { + std::vector table = { +#define INST(fn, name, bitstring) { name, GetMaskAndExpect(bitstring), "INST(" #fn ", " #name ", " #bitstring ")" }, +// Three registers of the same length +INST(asimd_VHADD, "VHADD", "1111001U0Dzznnnndddd0000NQM0mmmm") // ASIMD +INST(asimd_VQADD, "VQADD", "1111001U0Dzznnnndddd0000NQM1mmmm") // ASIMD +INST(asimd_VRHADD, "VRHADD", "1111001U0Dzznnnndddd0001NQM0mmmm") // ASIMD +INST(asimd_VAND_reg, "VAND (register)", "111100100D00nnnndddd0001NQM1mmmm") // ASIMD +INST(asimd_VBIC_reg, "VBIC (register)", "111100100D01nnnndddd0001NQM1mmmm") // ASIMD +INST(asimd_VORR_reg, "VORR (register)", "111100100D10nnnndddd0001NQM1mmmm") // ASIMD +INST(asimd_VORN_reg, "VORN (register)", "111100100D11nnnndddd0001NQM1mmmm") // ASIMD +INST(asimd_VEOR_reg, "VEOR (register)", "111100110D00nnnndddd0001NQM1mmmm") // ASIMD +INST(asimd_VBSL, "VBSL", "111100110D01nnnndddd0001NQM1mmmm") // ASIMD +INST(asimd_VBIT, "VBIT", "111100110D10nnnndddd0001NQM1mmmm") // ASIMD +INST(asimd_VBIF, "VBIF", "111100110D11nnnndddd0001NQM1mmmm") // ASIMD +INST(asimd_VHSUB, "VHSUB", "1111001U0Dzznnnndddd0010NQM0mmmm") // ASIMD +INST(asimd_VQSUB, "VQSUB", "1111001U0Dzznnnndddd0010NQM1mmmm") // ASIMD +INST(asimd_VCGT_reg, "VCGT (register)", "1111001U0Dzznnnndddd0011NQM0mmmm") // ASIMD +INST(asimd_VCGE_reg, "VCGE (register)", "1111001U0Dzznnnndddd0011NQM1mmmm") // ASIMD +INST(asimd_VSHL_reg, "VSHL (register)", "1111001U0Dzznnnndddd0100NQM0mmmm") // ASIMD +INST(asimd_VQSHL_reg, "VQSHL (register)", "1111001U0Dzznnnndddd0100NQM1mmmm") // ASIMD +INST(asimd_VRSHL, "VRSHL", "1111001U0Dzznnnndddd0101NQM0mmmm") // ASIMD +//INST(asimd_VQRSHL, "VQRSHL", "1111001U0-CC--------0101---1----") // ASIMD +INST(asimd_VMAX, "VMAX/VMIN (integer)", "1111001U0Dzznnnnmmmm0110NQMommmm") // ASIMD +INST(asimd_VABD, "VABD", "1111001U0Dzznnnndddd0111NQM0mmmm") // ASIMD +INST(asimd_VABA, "VABA", "1111001U0Dzznnnndddd0111NQM1mmmm") // ASIMD +INST(asimd_VADD_int, "VADD (integer)", "111100100Dzznnnndddd1000NQM0mmmm") // ASIMD +INST(asimd_VSUB_int, "VSUB (integer)", "111100110Dzznnnndddd1000NQM0mmmm") // ASIMD +INST(asimd_VTST, "VTST", "111100100Dzznnnndddd1000NQM1mmmm") // ASIMD +INST(asimd_VCEQ_reg, "VCEG (register)", "111100110Dzznnnndddd1000NQM1mmmm") // ASIMD +INST(asimd_VMLA, "VMLA/VMLS", "1111001o0Dzznnnndddd1001NQM0mmmm") // ASIMD +INST(asimd_VMUL, "VMUL", "1111001P0Dzznnnndddd1001NQM1mmmm") // ASIMD +INST(asimd_VPMAX_int, "VPMAX/VPMIN (integer)", "1111001U0Dzznnnndddd1010NQMommmm") // ASIMD +INST(v8_VMAXNM, "VMAXNM", "111100110D0znnnndddd1111NQM1mmmm") // v8 +INST(v8_VMINNM, "VMINNM", "111100110D1znnnndddd1111NQM1mmmm") // v8 +INST(asimd_VQDMULH, "VQDMULH", "111100100Dzznnnndddd1011NQM0mmmm") // ASIMD +INST(asimd_VQRDMULH, "VQRDMULH", "111100110Dzznnnndddd1011NQM0mmmm") // ASIMD +INST(asimd_VPADD, "VPADD", "111100100Dzznnnndddd1011NQM1mmmm") // ASIMD +INST(asimd_VFMA, "VFMA", "111100100D0znnnndddd1100NQM1mmmm") // ASIMD +INST(asimd_VFMS, "VFMS", "111100100D1znnnndddd1100NQM1mmmm") // ASIMD +INST(asimd_VADD_float, "VADD (floating-point)", "111100100D0znnnndddd1101NQM0mmmm") // ASIMD +INST(asimd_VSUB_float, "VSUB (floating-point)", "111100100D1znnnndddd1101NQM0mmmm") // ASIMD +INST(asimd_VPADD_float, "VPADD (floating-point)", "111100110D0znnnndddd1101NQM0mmmm") // ASIMD +INST(asimd_VABD_float, "VABD (floating-point)", "111100110D1znnnndddd1101NQM0mmmm") // ASIMD +INST(asimd_VMLA_float, "VMLA (floating-point)", "111100100D0znnnndddd1101NQM1mmmm") // ASIMD +INST(asimd_VMLS_float, "VMLS (floating-point)", "111100100D1znnnndddd1101NQM1mmmm") // ASIMD +INST(asimd_VMUL_float, "VMUL (floating-point)", "111100110D0znnnndddd1101NQM1mmmm") // ASIMD +INST(asimd_VCEQ_reg_float, "VCEQ (register)", "111100100D0znnnndddd1110NQM0mmmm") // ASIMD +INST(asimd_VCGE_reg_float, "VCGE (register)", "111100110D0znnnndddd1110NQM0mmmm") // ASIMD +INST(asimd_VCGT_reg_float, "VCGT (register)", "111100110D1znnnndddd1110NQM0mmmm") // ASIMD +INST(asimd_VACGE, "VACGE", "111100110Doznnnndddd1110NQM1mmmm") // ASIMD +INST(asimd_VMAX_float, "VMAX (floating-point)", "111100100D0znnnndddd1111NQM0mmmm") // ASIMD +INST(asimd_VMIN_float, "VMIN (floating-point)", "111100100D1znnnndddd1111NQM0mmmm") // ASIMD +INST(asimd_VPMAX_float, "VPMAX (floating-point)", "111100110D0znnnndddd1111NQM0mmmm") // ASIMD +INST(asimd_VPMIN_float, "VPMIN (floating-point)", "111100110D1znnnndddd1111NQM0mmmm") // ASIMD +INST(asimd_VRECPS, "VRECPS", "111100100D0znnnndddd1111NQM1mmmm") // ASIMD +INST(asimd_VRSQRTS, "VRSQRTS", "111100100D1znnnndddd1111NQM1mmmm") // ASIMD +INST(v8_SHA256H, "SHA256H", "111100110D00nnnndddd1100NQM0mmmm") // v8 +INST(v8_SHA256H2, "SHA256H2", "111100110D01nnnndddd1100NQM0mmmm") // v8 +INST(v8_SHA256SU1, "SHA256SU1", "111100110D10nnnndddd1100NQM0mmmm") // v8 + +// Three registers of different lengths +INST(asimd_VADDL, "VADDL/VADDW", "1111001U1Dzznnnndddd000oN0M0mmmm") // ASIMD +INST(asimd_VSUBL, "VSUBL/VSUBW", "1111001U1Dzznnnndddd001oN0M0mmmm") // ASIMD +//INST(asimd_VADDHN, "VADDHN", "111100101-----------0100-0-0----") // ASIMD +//INST(asimd_VRADDHN, "VRADDHN", "111100111-----------0100-0-0----") // ASIMD +INST(asimd_VABAL, "VABAL", "1111001U1Dzznnnndddd0101N0M0mmmm") // ASIMD +//INST(asimd_VSUBHN, "VSUBHN", "111100101-----------0110-0-0----") // ASIMD +//INST(asimd_VRSUBHN, "VRSUBHN", "111100111-----------0110-0-0----") // ASIMD +INST(asimd_VABDL, "VABDL", "1111001U1Dzznnnndddd0111N0M0mmmm") // ASIMD +INST(asimd_VMLAL, "VMLAL/VMLSL", "1111001U1Dzznnnndddd10o0N0M0mmmm") // ASIMD +//INST(asimd_VQDMLAL, "VQDMLAL", "111100101-----------10-1-0-0----") // ASIMD +INST(asimd_VMULL, "VMULL", "1111001U1Dzznnnndddd11P0N0M0mmmm") // ASIMD +//INST(asimd_VQDMULL, "VQDMULL", "111100101-----------1101-0-0----") // ASIMD + +// Two registers and a scalar +INST(asimd_VMLA_scalar, "VMLA (scalar)", "1111001Q1Dzznnnndddd0o0FN1M0mmmm") // ASIMD +INST(asimd_VMLAL_scalar, "VMLAL (scalar)", "1111001U1dzznnnndddd0o10N1M0mmmm") // ASIMD +//INST(asimd_VQDMLAL_scalar, "VQDMLAL/VQDMLSL (scalar)", "111100101-BB--------0x11-1-0----") // ASIMD +INST(asimd_VMUL_scalar, "VMUL (scalar)", "1111001Q1Dzznnnndddd100FN1M0mmmm") // ASIMD +INST(asimd_VMULL_scalar, "VMULL (scalar)", "1111001U1Dzznnnndddd1010N1M0mmmm") // ASIMD +INST(asimd_VQDMULL_scalar, "VQDMULL (scalar)", "111100101Dzznnnndddd1011N1M0mmmm") // ASIMD +INST(asimd_VQDMULH_scalar, "VQDMULH (scalar)", "1111001Q1Dzznnnndddd1100N1M0mmmm") // ASIMD +INST(asimd_VQRDMULH_scalar, "VQRDMULH (scalar)", "1111001Q1Dzznnnndddd1101N1M0mmmm") // ASIMD + +// Two registers and a shift amount +INST(asimd_SHR, "SHR", "1111001U1Diiiiiidddd0000LQM1mmmm") // ASIMD +INST(asimd_SRA, "SRA", "1111001U1Diiiiiidddd0001LQM1mmmm") // ASIMD +INST(asimd_VRSHR, "VRSHR", "1111001U1Diiiiiidddd0010LQM1mmmm") // ASIMD +INST(asimd_VRSRA, "VRSRA", "1111001U1Diiiiiidddd0011LQM1mmmm") // ASIMD +INST(asimd_VSRI, "VSRI", "111100111Diiiiiidddd0100LQM1mmmm") // ASIMD +INST(asimd_VSHL, "VSHL", "111100101Diiiiiidddd0101LQM1mmmm") // ASIMD +INST(asimd_VSLI, "VSLI", "111100111Diiiiiidddd0101LQM1mmmm") // ASIMD +INST(asimd_VQSHL, "VQSHL" , "1111001U1Diiiiiidddd011oLQM1mmmm") // ASIMD +INST(asimd_VSHRN, "VSHRN", "111100101Diiiiiidddd100000M1mmmm") // ASIMD +INST(asimd_VRSHRN, "VRSHRN", "111100101Diiiiiidddd100001M1mmmm") // ASIMD +INST(asimd_VQSHRUN, "VQSHRUN", "111100111Diiiiiidddd100000M1mmmm") // ASIMD +INST(asimd_VQRSHRUN, "VQRSHRUN", "111100111Diiiiiidddd100001M1mmmm") // ASIMD +INST(asimd_VQSHRN, "VQSHRN", "1111001U1Diiiiiidddd100100M1mmmm") // ASIMD +INST(asimd_VQRSHRN, "VQRSHRN", "1111001U1Diiiiiidddd100101M1mmmm") // ASIMD +INST(asimd_VSHLL, "VSHLL", "1111001U1Diiiiiidddd101000M1mmmm") // ASIMD +INST(asimd_VCVT_fixed, "VCVT (fixed-point)", "1111001U1Diiiiiidddd111o0QM1mmmm") // ASIMD + +// Two registers, miscellaneous +INST(asimd_VREV, "VREV{16,32,64}", "111100111D11zz00dddd000ooQM0mmmm") // ASIMD +INST(asimd_VPADDL, "VPADDL", "111100111D11zz00dddd0010oQM0mmmm") // ASIMD +INST(asimd_VCLS, "VCLS", "111100111D11zz00dddd01000QM0mmmm") // ASIMD +INST(asimd_VCLZ, "VCLZ", "111100111D11zz00dddd01001QM0mmmm") // ASIMD +INST(asimd_VCNT, "VCNT", "111100111D11zz00dddd01010QM0mmmm") // ASIMD +INST(asimd_VMVN_reg, "VMVN_reg", "111100111D11zz00dddd01011QM0mmmm") // ASIMD +INST(asimd_VPADAL, "VPADAL", "111100111D11zz00dddd0110oQM0mmmm") // ASIMD +INST(asimd_VQABS, "VQABS", "111100111D11zz00dddd01110QM0mmmm") // ASIMD +INST(asimd_VQNEG, "VQNEG", "111100111D11zz00dddd01111QM0mmmm") // ASIMD +INST(asimd_VCGT_zero, "VCGT (zero)", "111100111D11zz01dddd0F000QM0mmmm") // ASIMD +INST(asimd_VCGE_zero, "VCGE (zero)", "111100111D11zz01dddd0F001QM0mmmm") // ASIMD +INST(asimd_VCEQ_zero, "VCEQ (zero)", "111100111D11zz01dddd0F010QM0mmmm") // ASIMD +INST(asimd_VCLE_zero, "VCLE (zero)", "111100111D11zz01dddd0F011QM0mmmm") // ASIMD +INST(asimd_VCLT_zero, "VCLT (zero)", "111100111D11zz01dddd0F100QM0mmmm") // ASIMD +INST(arm_UDF, "UNALLOCATED", "111100111-11--01----01101--0----") // v8 +INST(asimd_VABS, "VABS", "111100111D11zz01dddd0F110QM0mmmm") // ASIMD +INST(asimd_VNEG, "VNEG", "111100111D11zz01dddd0F111QM0mmmm") // ASIMD +INST(asimd_VSWP, "VSWP", "111100111D110010dddd00000QM0mmmm") // ASIMD +INST(arm_UDF, "UNALLOCATED", "111100111-11--10----00000--0----") // ASIMD +INST(asimd_VTRN, "VTRN", "111100111D11zz10dddd00001QM0mmmm") // ASIMD +INST(asimd_VUZP, "VUZP", "111100111D11zz10dddd00010QM0mmmm") // ASIMD +INST(asimd_VZIP, "VZIP", "111100111D11zz10dddd00011QM0mmmm") // ASIMD +INST(asimd_VMOVN, "VMOVN", "111100111D11zz10dddd001000M0mmmm") // ASIMD +INST(asimd_VQMOVUN, "VQMOVUN", "111100111D11zz10dddd001001M0mmmm") // ASIMD +INST(asimd_VQMOVN, "VQMOVN", "111100111D11zz10dddd00101oM0mmmm") // ASIMD +INST(asimd_VSHLL_max, "VSHLL_max", "111100111D11zz10dddd001100M0mmmm") // ASIMD +INST(v8_VRINTN, "VRINTN", "111100111D11zz10dddd01000QM0mmmm") // v8 +INST(v8_VRINTX, "VRINTX", "111100111D11zz10dddd01001QM0mmmm") // v8 +INST(v8_VRINTA, "VRINTA", "111100111D11zz10dddd01010QM0mmmm") // v8 +INST(v8_VRINTZ, "VRINTZ", "111100111D11zz10dddd01011QM0mmmm") // v8 +INST(v8_VRINTM, "VRINTM", "111100111D11zz10dddd01101QM0mmmm") // v8 +INST(v8_VRINTP, "VRINTP", "111100111D11zz10dddd01111QM0mmmm") // v8 +INST(asimd_VCVT_half, "VCVT (half-precision)", "111100111D11zz10dddd011o00M0mmmm") // ASIMD +INST(arm_UDF, "UNALLOCATED", "111100111-11--10----011-01-0----") // ASIMD +INST(v8_VCVTA, "VCVTA", "111100111D11zz11dddd0000oQM0mmmm") // v8 +INST(v8_VCVTN, "VCVTN", "111100111D11zz11dddd0001oQM0mmmm") // v8 +INST(v8_VCVTP, "VCVTP", "111100111D11zz11dddd0010oQM0mmmm") // v8 +INST(v8_VCVTM, "VCVTM", "111100111D11zz11dddd0011oQM0mmmm") // v8 +INST(asimd_VRECPE, "VRECPE", "111100111D11zz11dddd010F0QM0mmmm") // ASIMD +INST(asimd_VRSQRTE, "VRSQRTE", "111100111D11zz11dddd010F1QM0mmmm") // ASIMD +INST(asimd_VCVT_integer, "VCVT (integer)", "111100111D11zz11dddd011oUQM0mmmm") // ASIMD + +// Two registers, cryptography +INST(v8_AESE, "AESE", "111100111D11zz00dddd001100M0mmmm") // v8 +INST(v8_AESD, "AESD", "111100111D11zz00dddd001101M0mmmm") // v8 +INST(v8_AESMC, "AESMC", "111100111D11zz00dddd001110M0mmmm") // v8 +INST(v8_AESIMC, "AESIMC", "111100111D11zz00dddd001111M0mmmm") // v8 +INST(arm_UDF, "UNALLOCATED", "111100111-11--01----001010-0----") // v8 +INST(arm_UDF, "UNALLOCATED (SHA1H)", "111100111-11--01----001011-0----") // v8 +INST(arm_UDF, "UNALLOCATED (SHA1SU1)", "111100111-11--10----001110-0----") // v8 +INST(v8_SHA256SU0, "SHA256SU0", "111100111D11zz10dddd001111M0mmmm") // v8 + +// One register and modified immediate +INST(asimd_VMOV_imm, "VBIC, VMOV, VMVN, VORR (immediate)", "1111001a1D000bcdVVVVmmmm0Qo1efgh") // ASIMD + +// Miscellaneous +INST(asimd_VEXT, "VEXT", "111100101D11nnnnddddiiiiNQM0mmmm") // ASIMD +INST(asimd_VTBL, "VTBL", "111100111D11nnnndddd10zzN0M0mmmm") // ASIMD +INST(asimd_VTBX, "VTBX", "111100111D11nnnndddd10zzN1M0mmmm") // ASIMD +INST(asimd_VDUP_scalar, "VDUP (scalar)", "111100111D11iiiidddd11000QM0mmmm") // ASIMD +INST(arm_UDF, "UNALLOCATED", "111100111-11--------11-----0----") // ASIMD + +// Advanced SIMD load/store structures +INST(v8_VST_multiple, "VST{1-4} (multiple)", "111101000D00nnnnddddxxxxzzaammmm") // v8 +INST(v8_VLD_multiple, "VLD{1-4} (multiple)", "111101000D10nnnnddddxxxxzzaammmm") // v8 +INST(arm_UDF, "UNALLOCATED", "111101000--0--------1011--------") // v8 +INST(arm_UDF, "UNALLOCATED", "111101000--0--------11----------") // v8 +INST(arm_UDF, "UNALLOCATED", "111101001-00--------11----------") // v8 +INST(v8_VLD_all_lanes, "VLD{1-4} (all lanes)", "111101001D10nnnndddd11nnzzTammmm") // v8 +INST(v8_VST_single, "VST{1-4} (single)", "111101001D00nnnnddddzzNNaaaammmm") // v8 +INST(v8_VLD_single, "VLD{1-4} (single)", "111101001D10nnnnddddzzNNaaaammmm") // v8 +#undef INST + }; + // Exceptions to the rule of thumb. + const std::set comes_first{ + "VBIC, VMOV, VMVN, VORR (immediate)", + "VEXT", + "VTBL", + "VTBX", + "VDUP (scalar)", + }; + const std::set comes_last{ + "VMLA (scalar)", + "VMLAL (scalar)", + "VQDMLAL/VQDMLSL (scalar)", + "VMUL (scalar)", + "VMULL (scalar)", + "VQDMULL (scalar)", + "VQDMULH (scalar)", + "VQRDMULH (scalar)", + }; + const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_first.count(e.first) > 0; + }); + const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_last.count(e.first) == 0; + }); + // If a matcher has more bits in its mask it is more specific, so it should come first. + std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) { + return mcl::bit::count_ones(a.second) > mcl::bit::count_ones(b.second); + }); + for (auto const& e : table) + printf("%s\n", e.inst_final); + } else if (which == "-a64") { + std::vector list = { +#define INST(fn, name, bitstring) { name, GetMaskAndExpect(bitstring), "INST(" #fn ", " #name ", " #bitstring ")" }, +// Data processing - Immediate - PC relative addressing +INST(ADR, "ADR", "0ii10000iiiiiiiiiiiiiiiiiiiddddd") +INST(ADRP, "ADRP", "1ii10000iiiiiiiiiiiiiiiiiiiddddd") + +// Data processing - Immediate - Add/Sub (with tags) +//INST(ADDG, "ADDG", "1001000110iiiiii00IIIInnnnnddddd") // ARMv8.5 +//INST(SUBG, "SUBG", "1101000110iiiiii00IIIInnnnnddddd") // ARMv8.5 + +// Data processing - Immediate - Add/Sub +INST(ADD_imm, "ADD (immediate)", "z0010001ssiiiiiiiiiiiinnnnnddddd") +INST(ADDS_imm, "ADDS (immediate)", "z0110001ssiiiiiiiiiiiinnnnnddddd") +INST(SUB_imm, "SUB (immediate)", "z1010001ssiiiiiiiiiiiinnnnnddddd") +INST(SUBS_imm, "SUBS (immediate)", "z1110001ssiiiiiiiiiiiinnnnnddddd") + +// Data processing - Immediate - Logical +INST(AND_imm, "AND (immediate)", "z00100100Nrrrrrrssssssnnnnnddddd") +INST(ORR_imm, "ORR (immediate)", "z01100100Nrrrrrrssssssnnnnnddddd") +INST(EOR_imm, "EOR (immediate)", "z10100100Nrrrrrrssssssnnnnnddddd") +INST(ANDS_imm, "ANDS (immediate)", "z11100100Nrrrrrrssssssnnnnnddddd") + +// Data processing - Immediate - Move Wide +INST(MOVN, "MOVN", "z00100101ssiiiiiiiiiiiiiiiiddddd") +INST(MOVZ, "MOVZ", "z10100101ssiiiiiiiiiiiiiiiiddddd") +INST(MOVK, "MOVK", "z11100101ssiiiiiiiiiiiiiiiiddddd") + +// Data processing - Immediate - Bitfield +INST(SBFM, "SBFM", "z00100110Nrrrrrrssssssnnnnnddddd") +INST(BFM, "BFM", "z01100110Nrrrrrrssssssnnnnnddddd") +INST(UBFM, "UBFM", "z10100110Nrrrrrrssssssnnnnnddddd") +INST(ASR_1, "ASR (immediate, 32-bit)", "00010011000rrrrr011111nnnnnddddd") +INST(ASR_2, "ASR (immediate, 64-bit)", "1001001101rrrrrr111111nnnnnddddd") +INST(SXTB_1, "SXTB (32-bit)", "0001001100000000000111nnnnnddddd") +INST(SXTB_2, "SXTB (64-bit)", "1001001101000000000111nnnnnddddd") +INST(SXTH_1, "SXTH (32-bit)", "0001001100000000001111nnnnnddddd") +INST(SXTH_2, "SXTH (64-bit)", "1001001101000000001111nnnnnddddd") +INST(SXTW, "SXTW", "1001001101000000011111nnnnnddddd") + +// Data processing - Immediate - Extract +INST(EXTR, "EXTR", "z00100111N0mmmmmssssssnnnnnddddd") + +// Conditional branch +INST(B_cond, "B.cond", "01010100iiiiiiiiiiiiiiiiiii0cccc") + +// Exception generation +INST(SVC, "SVC", "11010100000iiiiiiiiiiiiiiii00001") +//INST(HVC, "HVC", "11010100000iiiiiiiiiiiiiiii00010") +//INST(SMC, "SMC", "11010100000iiiiiiiiiiiiiiii00011") +INST(BRK, "BRK", "11010100001iiiiiiiiiiiiiiii00000") +//INST(HLT, "HLT", "11010100010iiiiiiiiiiiiiiii00000") +//INST(DCPS1, "DCPS1", "11010100101iiiiiiiiiiiiiiii00001") +//INST(DCPS2, "DCPS2", "11010100101iiiiiiiiiiiiiiii00010") +//INST(DCPS3, "DCPS3", "11010100101iiiiiiiiiiiiiiii00011") + +// System +//INST(MSR_imm, "MSR (immediate)", "1101010100000ooo0100MMMMooo11111") +INST(HINT, "HINT", "11010101000000110010MMMMooo11111") +INST(NOP, "NOP", "11010101000000110010000000011111") +INST(YIELD, "YIELD", "11010101000000110010000000111111") +INST(WFE, "WFE", "11010101000000110010000001011111") +INST(WFI, "WFI", "11010101000000110010000001111111") +INST(SEV, "SEV", "11010101000000110010000010011111") +INST(SEVL, "SEVL", "11010101000000110010000010111111") +//INST(DGH, "DGH", "11010101000000110010000011011111") // v8.6 +//INST(WFET, "WFET", "110101010000001100010000000ddddd") // v8.7 +//INST(WFIT, "WFIT", "110101010000001100010000001ddddd") // v8.7 +//INST(XPAC_1, "XPACD, XPACI, XPACLRI", "110110101100000101000D11111ddddd") +//INST(XPAC_2, "XPACD, XPACI, XPACLRI", "11010101000000110010000011111111") +//INST(PACIA_1, "PACIA, PACIA1716, PACIASP, PACIAZ, PACIZA", "110110101100000100Z000nnnnnddddd") +//INST(PACIA_2, "PACIA, PACIA1716, PACIASP, PACIAZ, PACIZA", "1101010100000011001000-100-11111") +//INST(PACIB_1, "PACIB, PACIB1716, PACIBSP, PACIBZ, PACIZB", "110110101100000100Z001nnnnnddddd") +//INST(PACIB_2, "PACIB, PACIB1716, PACIBSP, PACIBZ, PACIZB", "1101010100000011001000-101-11111") +//INST(AUTIA_1, "AUTIA, AUTIA1716, AUTIASP, AUTIAZ, AUTIZA", "110110101100000100Z100nnnnnddddd") +//INST(AUTIA_2, "AUTIA, AUTIA1716, AUTIASP, AUTIAZ, AUTIZA", "1101010100000011001000-110-11111") +//INST(AUTIB_1, "AUTIB, AUTIB1716, AUTIBSP, AUTIBZ, AUTIZB", "110110101100000100Z101nnnnnddddd") +//INST(AUTIB_2, "AUTIB, AUTIB1716, AUTIBSP, AUTIBZ, AUTIZB", "1101010100000011001000-111-11111") +//INST(BTI, "BTI", "110101010000001100100100ii011111") // ARMv8.5 +//INST(ESB, "ESB", "11010101000000110010001000011111") +//INST(PSB, "PSB CSYNC", "11010101000000110010001000111111") +//INST(TSB, "TSB CSYNC", "11010101000000110010001001011111") // ARMv8.5 +//INST(CSDB, "CSDB", "11010101000000110010001010011111") +INST(CLREX, "CLREX", "11010101000000110011MMMM01011111") +INST(DSB, "DSB", "11010101000000110011MMMM10011111") +//INST(SSBB, "SSBB", "11010101000000110011000010011111") +//INST(PSSBB, "PSSBB", "11010101000000110011010010011111") +INST(DMB, "DMB", "11010101000000110011MMMM10111111") +INST(ISB, "ISB", "11010101000000110011MMMM11011111") +//INST(SB, "SB", "11010101000000110011000011111111") +//INST(SYS, "SYS", "1101010100001oooNNNNMMMMooottttt") +INST(MSR_reg, "MSR (register)", "110101010001poooNNNNMMMMooottttt") +//INST(SYSL, "SYSL", "1101010100101oooNNNNMMMMooottttt") +INST(MRS, "MRS", "110101010011poooNNNNMMMMooottttt") + +// System - Flag manipulation instructions +INST(CFINV, "CFINV", "11010101000000000100000000011111") // ARMv8.4 +INST(RMIF, "RMIF", "10111010000iiiiii00001nnnnn0IIII") // ARMv8.4 +//INST(SETF8, "SETF8", "0011101000000000000010nnnnn01101") // ARMv8.4 +//INST(SETF16, "SETF16", "0011101000000000010010nnnnn01101") // ARMv8.4 + +// System - Flag format instructions +INST(XAFlag, "XAFlag", "11010101000000000100000000111111") // ARMv8.5 +INST(AXFlag, "AXFlag", "11010101000000000100000001011111") // ARMv8.5 + +// SYS: Data Cache +INST(DC_IVAC, "DC IVAC", "110101010000100001110110001ttttt") +INST(DC_ISW, "DC ISW", "110101010000100001110110010ttttt") +INST(DC_CSW, "DC CSW", "110101010000100001111010010ttttt") +INST(DC_CISW, "DC CISW", "110101010000100001111110010ttttt") +INST(DC_ZVA, "DC ZVA", "110101010000101101110100001ttttt") +INST(DC_CVAC, "DC CVAC", "110101010000101101111010001ttttt") +INST(DC_CVAU, "DC CVAU", "110101010000101101111011001ttttt") +INST(DC_CVAP, "DC CVAP", "110101010000101101111100001ttttt") +INST(DC_CIVAC, "DC CIVAC", "110101010000101101111110001ttttt") + +// SYS: Instruction Cache +INST(IC_IALLU, "IC IALLU", "11010101000010000111010100011111") +INST(IC_IALLUIS, "IC IALLUIS", "11010101000010000111000100011111") +INST(IC_IVAU, "IC IVAU", "110101010000101101110101001ttttt") + +// Unconditional branch (Register) +INST(BLR, "BLR", "1101011000111111000000nnnnn00000") +INST(BR, "BR", "1101011000011111000000nnnnn00000") +//INST(DRPS, "DRPS", "11010110101111110000001111100000") +//INST(ERET, "ERET", "11010110100111110000001111100000") +INST(RET, "RET", "1101011001011111000000nnnnn00000") +//INST(BLRA, "BLRAA, BLRAAZ, BLRAB, BLRABZ", "1101011Z0011111100001Mnnnnnmmmmm") // ARMv8.3 +//INST(BRA, "BRAA, BRAAZ, BRAB, BRABZ", "1101011Z0001111100001Mnnnnnmmmmm") // ARMv8.3 +//INST(ERETA, "ERETAA, ERETAB", "110101101001111100001M1111111111") // ARMv8.3 +//INST(RETA, "RETAA, RETAB", "110101100101111100001M1111111111") // ARMv8.3 + +// Unconditional branch (immediate) +INST(B_uncond, "B", "000101iiiiiiiiiiiiiiiiiiiiiiiiii") +INST(BL, "BL", "100101iiiiiiiiiiiiiiiiiiiiiiiiii") + +// Compare and branch (immediate) +INST(CBZ, "CBZ", "z0110100iiiiiiiiiiiiiiiiiiittttt") +INST(CBNZ, "CBNZ", "z0110101iiiiiiiiiiiiiiiiiiittttt") +INST(TBZ, "TBZ", "b0110110bbbbbiiiiiiiiiiiiiittttt") +INST(TBNZ, "TBNZ", "b0110111bbbbbiiiiiiiiiiiiiittttt") + +// Loads and stores - Advanced SIMD Load/Store multiple structures +INST(STx_mult_1, "STx (multiple structures)", "0Q00110000000000oooozznnnnnttttt") +INST(STx_mult_2, "STx (multiple structures)", "0Q001100100mmmmmoooozznnnnnttttt") +INST(LDx_mult_1, "LDx (multiple structures)", "0Q00110001000000oooozznnnnnttttt") +INST(LDx_mult_2, "LDx (multiple structures)", "0Q001100110mmmmmoooozznnnnnttttt") + +// Loads and stores - Advanced SIMD Load/Store single structures +INST(ST1_sngl_1, "ST1 (single structure)", "0Q00110100000000oo0Szznnnnnttttt") +INST(ST1_sngl_2, "ST1 (single structure)", "0Q001101100mmmmmoo0Szznnnnnttttt") +INST(ST3_sngl_1, "ST3 (single structure)", "0Q00110100000000oo1Szznnnnnttttt") +INST(ST3_sngl_2, "ST3 (single structure)", "0Q001101100mmmmmoo1Szznnnnnttttt") +INST(ST2_sngl_1, "ST2 (single structure)", "0Q00110100100000oo0Szznnnnnttttt") +INST(ST2_sngl_2, "ST2 (single structure)", "0Q001101101mmmmmoo0Szznnnnnttttt") +INST(ST4_sngl_1, "ST4 (single structure)", "0Q00110100100000oo1Szznnnnnttttt") +INST(ST4_sngl_2, "ST4 (single structure)", "0Q001101101mmmmmoo1Szznnnnnttttt") +INST(LD1_sngl_1, "LD1 (single structure)", "0Q00110101000000oo0Szznnnnnttttt") +INST(LD1_sngl_2, "LD1 (single structure)", "0Q001101110mmmmmoo0Szznnnnnttttt") +INST(LD3_sngl_1, "LD3 (single structure)", "0Q00110101000000oo1Szznnnnnttttt") +INST(LD3_sngl_2, "LD3 (single structure)", "0Q001101110mmmmmoo1Szznnnnnttttt") +INST(LD1R_1, "LD1R", "0Q001101010000001100zznnnnnttttt") +INST(LD1R_2, "LD1R", "0Q001101110mmmmm1100zznnnnnttttt") +INST(LD3R_1, "LD3R", "0Q001101010000001110zznnnnnttttt") +INST(LD3R_2, "LD3R", "0Q001101110mmmmm1110zznnnnnttttt") +INST(LD2_sngl_1, "LD2 (single structure)", "0Q00110101100000oo0Szznnnnnttttt") +INST(LD2_sngl_2, "LD2 (single structure)", "0Q001101111mmmmmoo0Szznnnnnttttt") +INST(LD4_sngl_1, "LD4 (single structure)", "0Q00110101100000oo1Szznnnnnttttt") +INST(LD4_sngl_2, "LD4 (single structure)", "0Q001101111mmmmmoo1Szznnnnnttttt") +INST(LD2R_1, "LD2R", "0Q001101011000001100zznnnnnttttt") +INST(LD2R_2, "LD2R", "0Q001101111mmmmm1100zznnnnnttttt") +INST(LD4R_1, "LD4R", "0Q001101011000001110zznnnnnttttt") +INST(LD4R_2, "LD4R", "0Q001101111mmmmm1110zznnnnnttttt") + +// Loads and stores - Load/Store Exclusive +INST(STXR, "STXRB, STXRH, STXR", "zz001000000sssss011111nnnnnttttt") +INST(STLXR, "STLXRB, STLXRH, STLXR", "zz001000000sssss111111nnnnnttttt") +INST(STXP, "STXP", "1z001000001sssss0uuuuunnnnnttttt") +INST(STLXP, "STLXP", "1z001000001sssss1uuuuunnnnnttttt") +INST(LDXR, "LDXRB, LDXRH, LDXR", "zz00100001011111011111nnnnnttttt") +INST(LDAXR, "LDAXRB, LDAXRH, LDAXR", "zz00100001011111111111nnnnnttttt") +INST(LDXP, "LDXP", "1z001000011111110uuuuunnnnnttttt") +INST(LDAXP, "LDAXP", "1z001000011111111uuuuunnnnnttttt") +INST(STLLR, "STLLRB, STLLRH, STLLR", "zz00100010011111011111nnnnnttttt") +INST(STLR, "STLRB, STLRH, STLR", "zz00100010011111111111nnnnnttttt") +INST(LDLAR, "LDLARB, LDLARH, LDLAR", "zz00100011011111011111nnnnnttttt") +INST(LDAR, "LDARB, LDARH, LDAR", "zz00100011011111111111nnnnnttttt") +//INST(CASP, "CASP, CASPA, CASPAL, CASPL", "0z0010000L1sssssp11111nnnnnttttt") // ARMv8.1 +//INST(CASB, "CASB, CASAB, CASALB, CASLB", "000010001L1sssssp11111nnnnnttttt") // ARMv8.1 +//INST(CASH, "CASH, CASAH, CASALH, CASLH", "010010001L1sssssp11111nnnnnttttt") // ARMv8.1 +//INST(CAS, "CAS, CASA, CASAL, CASL", "1z0010001L1sssssp11111nnnnnttttt") // ARMv8.1 + +// Loads and stores - Load register (literal) +INST(LDR_lit_gen, "LDR (literal)", "0z011000iiiiiiiiiiiiiiiiiiittttt") +INST(LDRSW_lit, "LDRSW (literal)", "10011000iiiiiiiiiiiiiiiiiiittttt") +INST(PRFM_lit, "PRFM (literal)", "11011000iiiiiiiiiiiiiiiiiiittttt") +INST(LDR_lit_fpsimd, "LDR (literal, SIMD&FP)", "oo011100iiiiiiiiiiiiiiiiiiittttt") + +// Loads and stores - Load/Store no-allocate pair +INST(STNP_LDNP_gen, "STNP/LDNP", "o01010000Liiiiiiiuuuuunnnnnttttt") +INST(STNP_LDNP_fpsimd, "STNP/LDNP (SIMD&FP)", "oo1011000Liiiiiiiuuuuunnnnnttttt") + +// Loads and stores - Load/Store register pair +INST(STP_LDP_gen, "STP/LDP", "oo10100pwLiiiiiiiuuuuunnnnnttttt") +INST(UnallocatedEncoding, "", "--1010000-----------------------") +INST(STP_LDP_fpsimd, "STP/LDP (SIMD&FP)", "oo10110pwLiiiiiiiuuuuunnnnnttttt") +INST(UnallocatedEncoding, "", "--1011000-----------------------") + +// Loads and stores - Load/Store register (unscaled immediate) +INST(STURx_LDURx, "STURx/LDURx", "zz111000oo0iiiiiiiii00nnnnnttttt") +INST(UnallocatedEncoding, "", "111110001-0---------00----------") +INST(UnallocatedEncoding, "", "10111000110---------00----------") +INST(PRFM_imm, "PRFM (immediate)", "1111100110iiiiiiiiiiiinnnnnttttt") +INST(PRFM_unscaled_imm, "PRFM (unscaled offset)", "11111000100iiiiiiiii00nnnnnttttt") +INST(STUR_fpsimd, "STUR (SIMD&FP)", "zz111100o00iiiiiiiii00nnnnnttttt") +INST(LDUR_fpsimd, "LDUR (SIMD&FP)", "zz111100o10iiiiiiiii00nnnnnttttt") + +// Loads and stores - Load/Store register (immediate pre/post-indexed) +INST(STRx_LDRx_imm_1, "STRx/LDRx (immediate)", "zz111000oo0iiiiiiiiip1nnnnnttttt") +INST(STRx_LDRx_imm_2, "STRx/LDRx (immediate)", "zz111001ooiiiiiiiiiiiinnnnnttttt") +INST(UnallocatedEncoding, "", "111110001-0----------1----------") +INST(UnallocatedEncoding, "", "10111000110----------1----------") +INST(UnallocatedEncoding, "", "1111100111----------------------") +INST(UnallocatedEncoding, "", "1011100111----------------------") +INST(STR_imm_fpsimd_1, "STR (immediate, SIMD&FP)", "zz111100o00iiiiiiiiip1nnnnnttttt") +INST(STR_imm_fpsimd_2, "STR (immediate, SIMD&FP)", "zz111101o0iiiiiiiiiiiinnnnnttttt") +INST(LDR_imm_fpsimd_1, "LDR (immediate, SIMD&FP)", "zz111100o10iiiiiiiiip1nnnnnttttt") +INST(LDR_imm_fpsimd_2, "LDR (immediate, SIMD&FP)", "zz111101o1iiiiiiiiiiiinnnnnttttt") +//INST(STGP_1, "STGP (post-index)", "0110100010iiiiiiimmmmmnnnnnttttt") // ARMv8.5 +//INST(STGP_2, "STGP (pre-index)", "0110100110iiiiiiimmmmmnnnnnttttt") // ARMv8.5 +//INST(STGP_3, "STGP (signed-offset)", "0110100100iiiiiiimmmmmnnnnnttttt") // ARMv8.5 + +// Loads and stores - Load/Store register (unprivileged) +INST(STTRB, "STTRB", "00111000000iiiiiiiii10nnnnnttttt") +INST(LDTRB, "LDTRB", "00111000010iiiiiiiii10nnnnnttttt") +INST(LDTRSB, "LDTRSB", "00111000oo0iiiiiiiii10nnnnnttttt") +INST(STTRH, "STTRH", "01111000000iiiiiiiii10nnnnnttttt") +INST(LDTRH, "LDTRH", "01111000010iiiiiiiii10nnnnnttttt") +INST(LDTRSH, "LDTRSH", "01111000oo0iiiiiiiii10nnnnnttttt") +INST(STTR, "STTR", "zz111000000iiiiiiiii10nnnnnttttt") +INST(LDTR, "LDTR", "zz111000010iiiiiiiii10nnnnnttttt") +INST(LDTRSW, "LDTRSW", "10111000100iiiiiiiii10nnnnnttttt") + +// Loads and stores - Atomic memory options +//INST(LDADDB, "LDADDB, LDADDAB, LDADDALB, LDADDLB", "00111000AR1sssss000000nnnnnttttt") +//INST(LDCLRB, "LDCLRB, LDCLRAB, LDCLRALB, LDCLRLB", "00111000AR1sssss000100nnnnnttttt") +//INST(LDEORB, "LDEORB, LDEORAB, LDEORALB, LDEORLB", "00111000AR1sssss001000nnnnnttttt") +//INST(LDSETB, "LDSETB, LDSETAB, LDSETALB, LDSETLB", "00111000AR1sssss001100nnnnnttttt") +//INST(LDSMAXB, "LDSMAXB, LDSMAXAB, LDSMAXALB, LDSMAXLB", "00111000AR1sssss010000nnnnnttttt") +//INST(LDSMINB, "LDSMINB, LDSMINAB, LDSMINALB, LDSMINLB", "00111000AR1sssss010100nnnnnttttt") +//INST(LDUMAXB, "LDUMAXB, LDUMAXAB, LDUMAXALB, LDUMAXLB", "00111000AR1sssss011000nnnnnttttt") +//INST(LDUMINB, "LDUMINB, LDUMINAB, LDUMINALB, LDUMINLB", "00111000AR1sssss011100nnnnnttttt") +//INST(SWPB, "SWPB, SWPAB, SWPALB, SWPLB", "00111000AR1sssss100000nnnnnttttt") +//INST(LDAPRB, "LDAPRB", "0011100010111111110000nnnnnttttt") +//INST(LDADDH, "LDADDH, LDADDAH, LDADDALH, LDADDLH", "01111000AR1sssss000000nnnnnttttt") +//INST(LDCLRH, "LDCLRH, LDCLRAH, LDCLRALH, LDCLRLH", "01111000AR1sssss000100nnnnnttttt") +//INST(LDEORH, "LDEORH, LDEORAH, LDEORALH, LDEORLH", "01111000AR1sssss001000nnnnnttttt") +//INST(LDSETH, "LDSETH, LDSETAH, LDSETALH, LDSETLH", "01111000AR1sssss001100nnnnnttttt") +//INST(LDSMAXH, "LDSMAXH, LDSMAXAH, LDSMAXALH, LDSMAXLH", "01111000AR1sssss010000nnnnnttttt") +//INST(LDSMINH, "LDSMINH, LDSMINAH, LDSMINALH, LDSMINLH", "01111000AR1sssss010100nnnnnttttt") +//INST(LDUMAXH, "LDUMAXH, LDUMAXAH, LDUMAXALH, LDUMAXLH", "01111000AR1sssss011000nnnnnttttt") +//INST(LDUMINH, "LDUMINH, LDUMINAH, LDUMINALH, LDUMINLH", "01111000AR1sssss011100nnnnnttttt") +//INST(SWPH, "SWPH, SWPAH, SWPALH, SWPLH", "01111000AR1sssss100000nnnnnttttt") +//INST(LDAPRH, "LDAPRH", "0111100010111111110000nnnnnttttt") +//INST(LDADD, "LDADD, LDADDA, LDADDAL, LDADDL", "1-111000AR1sssss000000nnnnnttttt") +//INST(LDCLR, "LDCLR, LDCLRA, LDCLRAL, LDCLRL", "1-111000AR1sssss000100nnnnnttttt") +//INST(LDEOR, "LDEOR, LDEORA, LDEORAL, LDEORL", "1-111000AR1sssss001000nnnnnttttt") +//INST(LDSET, "LDSET, LDSETA, LDSETAL, LDSETL", "1-111000AR1sssss001100nnnnnttttt") +//INST(LDSMAX, "LDSMAX, LDSMAXA, LDSMAXAL, LDSMAXL", "1-111000AR1sssss010000nnnnnttttt") +//INST(LDSMIN, "LDSMIN, LDSMINA, LDSMINAL, LDSMINL", "1-111000AR1sssss010100nnnnnttttt") +//INST(LDUMAX, "LDUMAX, LDUMAXA, LDUMAXAL, LDUMAXL", "1-111000AR1sssss011000nnnnnttttt") +//INST(LDUMIN, "LDUMIN, LDUMINA, LDUMINAL, LDUMINL", "1-111000AR1sssss011100nnnnnttttt") +//INST(SWP, "SWP, SWPA, SWPAL, SWPL", "1-111000AR1sssss100000nnnnnttttt") +//INST(LDAPR, "LDAPR", "1-11100010111111110000nnnnnttttt") +//INST(LD64B, "LD64B", "1111100000111111110100nnnnnttttt") // v8.7 +//INST(ST64B, "ST64B", "1111100000111111100100nnnnnttttt") // v8.7 +//INST(ST64BV, "ST64BV", "11111000001sssss101100nnnnnttttt") // v8.7 +//INST(ST64BV0, "ST64BV0", "11111000001sssss101000nnnnnttttt") // v8.7 + +// Loads and stores - Load/Store register (register offset) +INST(STRx_reg, "STRx (register)", "zz111000o01mmmmmxxxS10nnnnnttttt") +INST(LDRx_reg, "LDRx (register)", "zz111000o11mmmmmxxxS10nnnnnttttt") +INST(STR_reg_fpsimd, "STR (register, SIMD&FP)", "zz111100o01mmmmmxxxS10nnnnnttttt") +INST(LDR_reg_fpsimd, "LDR (register, SIMD&FP)", "zz111100o11mmmmmxxxS10nnnnnttttt") + +// Loads and stores - Load/Store memory tags +//INST(STG_1, "STG (post-index)", "11011001001iiiiiiiii01nnnnn11111") // ARMv8.5 +//INST(STG_2, "STG (pre-index)", "11011001001iiiiiiiii11nnnnn11111") // ARMv8.5 +//INST(STG_3, "STG (signed-offset)", "11011001001iiiiiiiii10nnnnn11111") // ARMv8.5 +//INST(LDG, "LDG", "11011001011iiiiiiiii00nnnnnttttt") // ARMv8.5 +//INST(STZG_1, "STZG (post-index)", "11011001011iiiiiiiii01nnnnn11111") // ARMv8.5 +//INST(STZG_2, "STZG (pre-index)", "11011001011iiiiiiiii11nnnnn11111") // ARMv8.5 +//INST(STZG_3, "STZG (signed-offset)", "11011001011iiiiiiiii10nnnnn11111") // ARMv8.5 +//INST(ST2G_1, "ST2G (post-index)", "11011001101iiiiiiiii01nnnnn11111") // ARMv8.5 +//INST(ST2G_2, "ST2G (pre-index)", "11011001101iiiiiiiii11nnnnn11111") // ARMv8.5 +//INST(ST2G_3, "ST2G (signed-offset)", "11011001101iiiiiiiii10nnnnn11111") // ARMv8.5 +//INST(STGV, "STGV", "1101100110100000000000nnnnnttttt") // ARMv8.5 +//INST(STZ2G_1, "STZ2G (post-index)", "11011001111iiiiiiiii01nnnnn11111") // ARMv8.5 +//INST(STZ2G_2, "STZ2G (pre-index)", "11011001111iiiiiiiii11nnnnn11111") // ARMv8.5 +//INST(STZ2G_3, "STZ2G (signed-offset)", "11011001111iiiiiiiii10nnnnn11111") // ARMv8.5 +//INST(LDGV, "LDGV", "1101100111100000000000nnnnnttttt") // ARMv8.5 + +// Loads and stores - Load/Store register (pointer authentication) +//INST(LDRA, "LDRAA, LDRAB", "11111000MS1iiiiiiiiiW1nnnnnttttt") + +// Data Processing - Register - 2 source +INST(UDIV, "UDIV", "z0011010110mmmmm000010nnnnnddddd") +INST(SDIV, "SDIV", "z0011010110mmmmm000011nnnnnddddd") +INST(LSLV, "LSLV", "z0011010110mmmmm001000nnnnnddddd") +INST(LSRV, "LSRV", "z0011010110mmmmm001001nnnnnddddd") +INST(ASRV, "ASRV", "z0011010110mmmmm001010nnnnnddddd") +INST(RORV, "RORV", "z0011010110mmmmm001011nnnnnddddd") +INST(CRC32, "CRC32B, CRC32H, CRC32W, CRC32X", "z0011010110mmmmm0100zznnnnnddddd") +INST(CRC32C, "CRC32CB, CRC32CH, CRC32CW, CRC32CX", "z0011010110mmmmm0101zznnnnnddddd") +//INST(PACGA, "PACGA", "10011010110mmmmm001100nnnnnddddd") +//INST(SUBP, "SUBP", "10011010110mmmmm000000nnnnnddddd") // ARMv8.5 +//INST(IRG, "IRG", "10011010110mmmmm000100nnnnnddddd") // ARMv8.5 +//INST(GMI, "GMI", "10011010110mmmmm000101nnnnnddddd") // ARMv8.5 +//INST(SUBPS, "SUBPS", "10111010110mmmmm000000nnnnnddddd") // ARMv8.5 + +// Data Processing - Register - 1 source +INST(RBIT_int, "RBIT", "z101101011000000000000nnnnnddddd") +INST(REV16_int, "REV16", "z101101011000000000001nnnnnddddd") +INST(REV, "REV", "z10110101100000000001onnnnnddddd") +INST(CLZ_int, "CLZ", "z101101011000000000100nnnnnddddd") +INST(CLS_int, "CLS", "z101101011000000000101nnnnnddddd") +INST(REV32_int, "REV32", "1101101011000000000010nnnnnddddd") +//INST(PACDA, "PACDA, PACDZA", "110110101100000100Z010nnnnnddddd") +//INST(PACDB, "PACDB, PACDZB", "110110101100000100Z011nnnnnddddd") +//INST(AUTDA, "AUTDA, AUTDZA", "110110101100000100Z110nnnnnddddd") +//INST(AUTDB, "AUTDB, AUTDZB", "110110101100000100Z111nnnnnddddd") + +// Data Processing - Register - Logical (shifted register) +INST(AND_shift, "AND (shifted register)", "z0001010ss0mmmmmiiiiiinnnnnddddd") +INST(BIC_shift, "BIC (shifted register)", "z0001010ss1mmmmmiiiiiinnnnnddddd") +INST(ORR_shift, "ORR (shifted register)", "z0101010ss0mmmmmiiiiiinnnnnddddd") +INST(ORN_shift, "ORN (shifted register)", "z0101010ss1mmmmmiiiiiinnnnnddddd") +INST(EOR_shift, "EOR (shifted register)", "z1001010ss0mmmmmiiiiiinnnnnddddd") +INST(EON, "EON (shifted register)", "z1001010ss1mmmmmiiiiiinnnnnddddd") +INST(ANDS_shift, "ANDS (shifted register)", "z1101010ss0mmmmmiiiiiinnnnnddddd") +INST(BICS, "BICS (shifted register)", "z1101010ss1mmmmmiiiiiinnnnnddddd") + +// Data Processing - Register - Add/Sub (shifted register) +INST(ADD_shift, "ADD (shifted register)", "z0001011ss0mmmmmiiiiiinnnnnddddd") +INST(ADDS_shift, "ADDS (shifted register)", "z0101011ss0mmmmmiiiiiinnnnnddddd") +INST(SUB_shift, "SUB (shifted register)", "z1001011ss0mmmmmiiiiiinnnnnddddd") +INST(SUBS_shift, "SUBS (shifted register)", "z1101011ss0mmmmmiiiiiinnnnnddddd") + +// Data Processing - Register - Add/Sub (shifted register) +INST(ADD_ext, "ADD (extended register)", "z0001011001mmmmmxxxiiinnnnnddddd") +INST(ADDS_ext, "ADDS (extended register)", "z0101011001mmmmmxxxiiinnnnnddddd") +INST(SUB_ext, "SUB (extended register)", "z1001011001mmmmmxxxiiinnnnnddddd") +INST(SUBS_ext, "SUBS (extended register)", "z1101011001mmmmmxxxiiinnnnnddddd") + +// Data Processing - Register - Add/Sub (with carry) +INST(ADC, "ADC", "z0011010000mmmmm000000nnnnnddddd") +INST(ADCS, "ADCS", "z0111010000mmmmm000000nnnnnddddd") +INST(SBC, "SBC", "z1011010000mmmmm000000nnnnnddddd") +INST(SBCS, "SBCS", "z1111010000mmmmm000000nnnnnddddd") + +// Data Processing - Register - Conditional compare +INST(CCMN_reg, "CCMN (register)", "z0111010010mmmmmcccc00nnnnn0ffff") +INST(CCMP_reg, "CCMP (register)", "z1111010010mmmmmcccc00nnnnn0ffff") +INST(CCMN_imm, "CCMN (immediate)", "z0111010010iiiiicccc10nnnnn0ffff") +INST(CCMP_imm, "CCMP (immediate)", "z1111010010iiiiicccc10nnnnn0ffff") + +// Data Processing - Register - Conditional select +INST(CSEL, "CSEL", "z0011010100mmmmmcccc00nnnnnddddd") +INST(CSINC, "CSINC", "z0011010100mmmmmcccc01nnnnnddddd") +INST(CSINV, "CSINV", "z1011010100mmmmmcccc00nnnnnddddd") +INST(CSNEG, "CSNEG", "z1011010100mmmmmcccc01nnnnnddddd") + +// Data Processing - Register - 3 source +INST(MADD, "MADD", "z0011011000mmmmm0aaaaannnnnddddd") +INST(MSUB, "MSUB", "z0011011000mmmmm1aaaaannnnnddddd") +INST(SMADDL, "SMADDL", "10011011001mmmmm0aaaaannnnnddddd") +INST(SMSUBL, "SMSUBL", "10011011001mmmmm1aaaaannnnnddddd") +INST(SMULH, "SMULH", "10011011010mmmmm011111nnnnnddddd") +INST(UMADDL, "UMADDL", "10011011101mmmmm0aaaaannnnnddddd") +INST(UMSUBL, "UMSUBL", "10011011101mmmmm1aaaaannnnnddddd") +INST(UMULH, "UMULH", "10011011110mmmmm011111nnnnnddddd") + +// Data Processing - FP and SIMD - AES +INST(AESE, "AESE", "0100111000101000010010nnnnnddddd") +INST(AESD, "AESD", "0100111000101000010110nnnnnddddd") +INST(AESMC, "AESMC", "0100111000101000011010nnnnnddddd") +INST(AESIMC, "AESIMC", "0100111000101000011110nnnnnddddd") + +// Data Processing - FP and SIMD - SHA +INST(SHA1C, "SHA1C", "01011110000mmmmm000000nnnnnddddd") +INST(SHA1P, "SHA1P", "01011110000mmmmm000100nnnnnddddd") +INST(SHA1M, "SHA1M", "01011110000mmmmm001000nnnnnddddd") +INST(SHA1SU0, "SHA1SU0", "01011110000mmmmm001100nnnnnddddd") +INST(SHA256H, "SHA256H", "01011110000mmmmm010000nnnnnddddd") +INST(SHA256H2, "SHA256H2", "01011110000mmmmm010100nnnnnddddd") +INST(SHA256SU1, "SHA256SU1", "01011110000mmmmm011000nnnnnddddd") +INST(SHA1H, "SHA1H", "0101111000101000000010nnnnnddddd") +INST(SHA1SU1, "SHA1SU1", "0101111000101000000110nnnnnddddd") +INST(SHA256SU0, "SHA256SU0", "0101111000101000001010nnnnnddddd") + +// Data Processing - FP and SIMD - Scalar copy +INST(DUP_elt_1, "DUP (element)", "01011110000iiiii000001nnnnnddddd") + +// Data Processing - FP and SIMD - Scalar three +//INST(FMULX_vec_1, "FMULX", "01011110010mmmmm000111nnnnnddddd") +INST(FMULX_vec_2, "FMULX", "010111100z1mmmmm110111nnnnnddddd") +INST(FCMEQ_reg_1, "FCMEQ (register)", "01011110010mmmmm001001nnnnnddddd") +INST(FCMEQ_reg_2, "FCMEQ (register)", "010111100z1mmmmm111001nnnnnddddd") +INST(FRECPS_1, "FRECPS", "01011110010mmmmm001111nnnnnddddd") +INST(FRECPS_2, "FRECPS", "010111100z1mmmmm111111nnnnnddddd") +INST(FRSQRTS_1, "FRSQRTS", "01011110110mmmmm001111nnnnnddddd") +INST(FRSQRTS_2, "FRSQRTS", "010111101z1mmmmm111111nnnnnddddd") +//INST(FCMGE_reg_1, "FCMGE (register)", "01111110010mmmmm001001nnnnnddddd") +INST(FCMGE_reg_2, "FCMGE (register)", "011111100z1mmmmm111001nnnnnddddd") +//INST(FACGE_1, "FACGE", "01111110010mmmmm001011nnnnnddddd") +INST(FACGE_2, "FACGE", "011111100z1mmmmm111011nnnnnddddd") +//INST(FABD_1, "FABD", "01111110110mmmmm000101nnnnnddddd") +INST(FABD_2, "FABD", "011111101z1mmmmm110101nnnnnddddd") +//INST(FCMGT_reg_1, "FCMGT (register)", "01111110110mmmmm001001nnnnnddddd") +INST(FCMGT_reg_2, "FCMGT (register)", "011111101z1mmmmm111001nnnnnddddd") +//INST(FACGT_1, "FACGT", "01111110110mmmmm001011nnnnnddddd") +INST(FACGT_2, "FACGT", "011111101z1mmmmm111011nnnnnddddd") + +// Data Processing - FP and SIMD - Scalar two register misc +//INST(FCVTNS_1, "FCVTNS (vector)", "0101111001111001101010nnnnnddddd") +INST(FCVTNS_2, "FCVTNS (vector)", "010111100z100001101010nnnnnddddd") +//INST(FCVTMS_1, "FCVTMS (vector)", "0101111001111001101110nnnnnddddd") +INST(FCVTMS_2, "FCVTMS (vector)", "010111100z100001101110nnnnnddddd") +//INST(FCVTAS_1, "FCVTAS (vector)", "0101111001111001110010nnnnnddddd") +INST(FCVTAS_2, "FCVTAS (vector)", "010111100z100001110010nnnnnddddd") +//INST(SCVTF_int_1, "SCVTF (vector, integer)", "0101111001111001110110nnnnnddddd") +INST(SCVTF_int_2, "SCVTF (vector, integer)", "010111100z100001110110nnnnnddddd") +//INST(FCMGT_zero_1, "FCMGT (zero)", "0101111011111000110010nnnnnddddd") +INST(FCMGT_zero_2, "FCMGT (zero)", "010111101z100000110010nnnnnddddd") +INST(FCMEQ_zero_1, "FCMEQ (zero)", "0101111011111000110110nnnnnddddd") +INST(FCMEQ_zero_2, "FCMEQ (zero)", "010111101z100000110110nnnnnddddd") +//INST(FCMLT_1, "FCMLT (zero)", "0101111011111000111010nnnnnddddd") +INST(FCMLT_2, "FCMLT (zero)", "010111101z100000111010nnnnnddddd") +//INST(FCVTPS_1, "FCVTPS (vector)", "0101111011111001101010nnnnnddddd") +INST(FCVTPS_2, "FCVTPS (vector)", "010111101z100001101010nnnnnddddd") +//INST(FCVTZS_int_1, "FCVTZS (vector, integer)", "0101111011111001101110nnnnnddddd") +INST(FCVTZS_int_2, "FCVTZS (vector, integer)", "010111101z100001101110nnnnnddddd") +INST(FRECPE_1, "FRECPE", "0101111011111001110110nnnnnddddd") +INST(FRECPE_2, "FRECPE", "010111101z100001110110nnnnnddddd") +INST(FRECPX_1, "FRECPX", "0101111011111001111110nnnnnddddd") +INST(FRECPX_2, "FRECPX", "010111101z100001111110nnnnnddddd") +//INST(FCVTNU_1, "FCVTNU (vector)", "0111111001111001101010nnnnnddddd") +INST(FCVTNU_2, "FCVTNU (vector)", "011111100z100001101010nnnnnddddd") +//INST(FCVTMU_1, "FCVTMU (vector)", "0111111001111001101110nnnnnddddd") +INST(FCVTMU_2, "FCVTMU (vector)", "011111100z100001101110nnnnnddddd") +//INST(FCVTAU_1, "FCVTAU (vector)", "0111111001111001110010nnnnnddddd") +INST(FCVTAU_2, "FCVTAU (vector)", "011111100z100001110010nnnnnddddd") +//INST(UCVTF_int_1, "UCVTF (vector, integer)", "0111111001111001110110nnnnnddddd") +INST(UCVTF_int_2, "UCVTF (vector, integer)", "011111100z100001110110nnnnnddddd") +//INST(FCMGE_zero_1, "FCMGE (zero)", "0111111011111000110010nnnnnddddd") +INST(FCMGE_zero_2, "FCMGE (zero)", "011111101z100000110010nnnnnddddd") +//INST(FCMLE_1, "FCMLE (zero)", "0111111011111000110110nnnnnddddd") +INST(FCMLE_2, "FCMLE (zero)", "011111101z100000110110nnnnnddddd") +//INST(FCVTPU_1, "FCVTPU (vector)", "0111111011111001101010nnnnnddddd") +INST(FCVTPU_2, "FCVTPU (vector)", "011111101z100001101010nnnnnddddd") +//INST(FCVTZU_int_1, "FCVTZU (vector, integer)", "0111111011111001101110nnnnnddddd") +INST(FCVTZU_int_2, "FCVTZU (vector, integer)", "011111101z100001101110nnnnnddddd") +INST(FRSQRTE_1, "FRSQRTE", "0111111011111001110110nnnnnddddd") +INST(FRSQRTE_2, "FRSQRTE", "011111101z100001110110nnnnnddddd") + +// Data Processing - FP and SIMD - Scalar three same extra +//INST(SQRDMLAH_vec_1, "SQRDMLAH (vector)", "01111110zz0mmmmm100001nnnnnddddd") +//INST(SQRDMLAH_vec_2, "SQRDMLAH (vector)", "0Q101110zz0mmmmm100001nnnnnddddd") +//INST(SQRDMLSH_vec_1, "SQRDMLSH (vector)", "01111110zz0mmmmm100011nnnnnddddd") +//INST(SQRDMLSH_vec_2, "SQRDMLSH (vector)", "0Q101110zz0mmmmm100011nnnnnddddd") + +// Data Processing - FP and SIMD - Scalar two-register misc +INST(SUQADD_1, "SUQADD", "01011110zz100000001110nnnnnddddd") +INST(SQABS_1, "SQABS", "01011110zz100000011110nnnnnddddd") +INST(CMGT_zero_1, "CMGT (zero)", "01011110zz100000100010nnnnnddddd") +INST(CMEQ_zero_1, "CMEQ (zero)", "01011110zz100000100110nnnnnddddd") +INST(CMLT_1, "CMLT (zero)", "01011110zz100000101010nnnnnddddd") +INST(ABS_1, "ABS", "01011110zz100000101110nnnnnddddd") +INST(SQXTN_1, "SQXTN, SQXTN2", "01011110zz100001010010nnnnnddddd") +INST(USQADD_1, "USQADD", "01111110zz100000001110nnnnnddddd") +INST(SQNEG_1, "SQNEG", "01111110zz100000011110nnnnnddddd") +INST(CMGE_zero_1, "CMGE (zero)", "01111110zz100000100010nnnnnddddd") +INST(CMLE_1, "CMLE (zero)", "01111110zz100000100110nnnnnddddd") +INST(NEG_1, "NEG (vector)", "01111110zz100000101110nnnnnddddd") +INST(SQXTUN_1, "SQXTUN, SQXTUN2", "01111110zz100001001010nnnnnddddd") +INST(UQXTN_1, "UQXTN, UQXTN2", "01111110zz100001010010nnnnnddddd") +INST(FCVTXN_1, "FCVTXN, FCVTXN2", "011111100z100001011010nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Scalar pairwise +INST(ADDP_pair, "ADDP (scalar)", "01011110zz110001101110nnnnnddddd") +//INST(FMAXNMP_pair_1, "FMAXNMP (scalar)", "0101111000110000110010nnnnnddddd") +INST(FMAXNMP_pair_2, "FMAXNMP (scalar)", "011111100z110000110010nnnnnddddd") +//INST(FADDP_pair_1, "FADDP (scalar)", "0101111000110000110110nnnnnddddd") +INST(FADDP_pair_2, "FADDP (scalar)", "011111100z110000110110nnnnnddddd") +//INST(FMAXP_pair_1, "FMAXP (scalar)", "0101111000110000111110nnnnnddddd") +INST(FMAXP_pair_2, "FMAXP (scalar)", "011111100z110000111110nnnnnddddd") +//INST(FMINNMP_pair_1, "FMINNMP (scalar)", "0101111010110000110010nnnnnddddd") +INST(FMINNMP_pair_2, "FMINNMP (scalar)", "011111101z110000110010nnnnnddddd") +//INST(FMINP_pair_1, "FMINP (scalar)", "0101111010110000111110nnnnnddddd") +INST(FMINP_pair_2, "FMINP (scalar)", "011111101z110000111110nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Scalar three different +//INST(SQDMLAL_vec_1, "SQDMLAL, SQDMLAL2 (vector)", "01011110zz1mmmmm100100nnnnnddddd") +//INST(SQDMLSL_vec_1, "SQDMLSL, SQDMLSL2 (vector)", "01011110zz1mmmmm101100nnnnnddddd") +//INST(SQDMULL_vec_1, "SQDMULL, SQDMULL2 (vector)", "01011110zz1mmmmm110100nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Scalar three same +INST(SQADD_1, "SQADD", "01011110zz1mmmmm000011nnnnnddddd") +INST(SQSUB_1, "SQSUB", "01011110zz1mmmmm001011nnnnnddddd") +INST(CMGT_reg_1, "CMGT (register)", "01011110zz1mmmmm001101nnnnnddddd") +INST(CMGE_reg_1, "CMGE (register)", "01011110zz1mmmmm001111nnnnnddddd") +INST(SSHL_1, "SSHL", "01011110zz1mmmmm010001nnnnnddddd") +INST(SQSHL_reg_1, "SQSHL (register)", "01011110zz1mmmmm010011nnnnnddddd") +INST(SRSHL_1, "SRSHL", "01011110zz1mmmmm010101nnnnnddddd") +//INST(SQRSHL_1, "SQRSHL", "01011110zz1mmmmm010111nnnnnddddd") +INST(ADD_1, "ADD (vector)", "01011110zz1mmmmm100001nnnnnddddd") +INST(CMTST_1, "CMTST", "01011110zz1mmmmm100011nnnnnddddd") +INST(SQDMULH_vec_1, "SQDMULH (vector)", "01011110zz1mmmmm101101nnnnnddddd") +INST(UQADD_1, "UQADD", "01111110zz1mmmmm000011nnnnnddddd") +INST(UQSUB_1, "UQSUB", "01111110zz1mmmmm001011nnnnnddddd") +INST(CMHI_1, "CMHI (register)", "01111110zz1mmmmm001101nnnnnddddd") +INST(CMHS_1, "CMHS (register)", "01111110zz1mmmmm001111nnnnnddddd") +INST(USHL_1, "USHL", "01111110zz1mmmmm010001nnnnnddddd") +INST(UQSHL_reg_1, "UQSHL (register)", "01111110zz1mmmmm010011nnnnnddddd") +INST(URSHL_1, "URSHL", "01111110zz1mmmmm010101nnnnnddddd") +//INST(UQRSHL_1, "UQRSHL", "01111110zz1mmmmm010111nnnnnddddd") +INST(SUB_1, "SUB (vector)", "01111110zz1mmmmm100001nnnnnddddd") +INST(CMEQ_reg_1, "CMEQ (register)", "01111110zz1mmmmm100011nnnnnddddd") +INST(SQRDMULH_vec_1, "SQRDMULH (vector)", "01111110zz1mmmmm101101nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Scalar shift by immediate +INST(SSHR_1, "SSHR", "010111110IIIIiii000001nnnnnddddd") +INST(SSRA_1, "SSRA", "010111110IIIIiii000101nnnnnddddd") +INST(SRSHR_1, "SRSHR", "010111110IIIIiii001001nnnnnddddd") +INST(SRSRA_1, "SRSRA", "010111110IIIIiii001101nnnnnddddd") +INST(SHL_1, "SHL", "010111110IIIIiii010101nnnnnddddd") +INST(SQSHL_imm_1, "SQSHL (immediate)", "010111110IIIIiii011101nnnnnddddd") +INST(SQSHRN_1, "SQSHRN, SQSHRN2", "010111110IIIIiii100101nnnnnddddd") +//INST(SQRSHRN_1, "SQRSHRN, SQRSHRN2", "010111110IIIIiii100111nnnnnddddd") +INST(SCVTF_fix_1, "SCVTF (vector, fixed-point)", "010111110IIIIiii111001nnnnnddddd") +INST(FCVTZS_fix_1, "FCVTZS (vector, fixed-point)", "010111110IIIIiii111111nnnnnddddd") +INST(USHR_1, "USHR", "011111110IIIIiii000001nnnnnddddd") +INST(USRA_1, "USRA", "011111110IIIIiii000101nnnnnddddd") +INST(URSHR_1, "URSHR", "011111110IIIIiii001001nnnnnddddd") +INST(URSRA_1, "URSRA", "011111110IIIIiii001101nnnnnddddd") +INST(SRI_1, "SRI", "011111110IIIIiii010001nnnnnddddd") +INST(SLI_1, "SLI", "011111110IIIIiii010101nnnnnddddd") +INST(SQSHLU_1, "SQSHLU", "011111110IIIIiii011001nnnnnddddd") +INST(UQSHL_imm_1, "UQSHL (immediate)", "011111110IIIIiii011101nnnnnddddd") +INST(SQSHRUN_1, "SQSHRUN, SQSHRUN2", "011111110IIIIiii100001nnnnnddddd") +//INST(SQRSHRUN_1, "SQRSHRUN, SQRSHRUN2", "011111110IIIIiii100011nnnnnddddd") +INST(UQSHRN_1, "UQSHRN, UQSHRN2", "011111110IIIIiii100101nnnnnddddd") +//INST(UQRSHRN_1, "UQRSHRN, UQRSHRN2", "011111110IIIIiii100111nnnnnddddd") +INST(UCVTF_fix_1, "UCVTF (vector, fixed-point)", "011111110IIIIiii111001nnnnnddddd") +INST(FCVTZU_fix_1, "FCVTZU (vector, fixed-point)", "011111110IIIIiii111111nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Scalar x indexed element +//INST(SQDMLAL_elt_1, "SQDMLAL, SQDMLAL2 (by element)", "01011111zzLMmmmm0011H0nnnnnddddd") +//INST(SQDMLSL_elt_1, "SQDMLSL, SQDMLSL2 (by element)", "01011111zzLMmmmm0111H0nnnnnddddd") +INST(SQDMULL_elt_1, "SQDMULL, SQDMULL2 (by element)", "01011111zzLMmmmm1011H0nnnnnddddd") +INST(SQDMULH_elt_1, "SQDMULH (by element)", "01011111zzLMmmmm1100H0nnnnnddddd") +INST(SQRDMULH_elt_1, "SQRDMULH (by element)", "01011111zzLMmmmm1101H0nnnnnddddd") +INST(FMLA_elt_1, "FMLA (by element)", "0101111100LMmmmm0001H0nnnnnddddd") +INST(FMLA_elt_2, "FMLA (by element)", "010111111zLMmmmm0001H0nnnnnddddd") +INST(FMLS_elt_1, "FMLS (by element)", "0101111100LMmmmm0101H0nnnnnddddd") +INST(FMLS_elt_2, "FMLS (by element)", "010111111zLMmmmm0101H0nnnnnddddd") +//INST(FMUL_elt_1, "FMUL (by element)", "0101111100LMmmmm1001H0nnnnnddddd") +INST(FMUL_elt_2, "FMUL (by element)", "010111111zLMmmmm1001H0nnnnnddddd") +//INST(SQRDMLAH_elt_1, "SQRDMLAH (by element)", "01111111zzLMmmmm1101H0nnnnnddddd") +//INST(SQRDMLSH_elt_1, "SQRDMLSH (by element)", "01111111zzLMmmmm1111H0nnnnnddddd") +//INST(FMULX_elt_1, "FMULX (by element)", "0111111100LMmmmm1001H0nnnnnddddd") +INST(FMULX_elt_2, "FMULX (by element)", "011111111zLMmmmm1001H0nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Table Lookup +INST(TBL, "TBL", "0Q001110000mmmmm0LL000nnnnnddddd") +INST(TBX, "TBX", "0Q001110000mmmmm0LL100nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Permute +INST(UZP1, "UZP1", "0Q001110zz0mmmmm000110nnnnnddddd") +INST(TRN1, "TRN1", "0Q001110zz0mmmmm001010nnnnnddddd") +INST(ZIP1, "ZIP1", "0Q001110zz0mmmmm001110nnnnnddddd") +INST(UZP2, "UZP2", "0Q001110zz0mmmmm010110nnnnnddddd") +INST(TRN2, "TRN2", "0Q001110zz0mmmmm011010nnnnnddddd") +INST(ZIP2, "ZIP2", "0Q001110zz0mmmmm011110nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Extract +INST(EXT, "EXT", "0Q101110000mmmmm0iiii0nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Copy +INST(DUP_elt_2, "DUP (element)", "0Q001110000iiiii000001nnnnnddddd") +INST(DUP_gen, "DUP (general)", "0Q001110000iiiii000011nnnnnddddd") +INST(SMOV, "SMOV", "0Q001110000iiiii001011nnnnnddddd") +INST(UMOV, "UMOV", "0Q001110000iiiii001111nnnnnddddd") +INST(INS_gen, "INS (general)", "01001110000iiiii000111nnnnnddddd") +INST(INS_elt, "INS (element)", "01101110000iiiii0iiii1nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Three same +//INST(FMULX_vec_3, "FMULX", "0Q001110010mmmmm000111nnnnnddddd") +INST(FCMEQ_reg_3, "FCMEQ (register)", "0Q001110010mmmmm001001nnnnnddddd") +INST(FRECPS_3, "FRECPS", "0Q001110010mmmmm001111nnnnnddddd") +INST(FRSQRTS_3, "FRSQRTS", "0Q001110110mmmmm001111nnnnnddddd") +//INST(FCMGE_reg_3, "FCMGE (register)", "0Q101110010mmmmm001001nnnnnddddd") +//INST(FACGE_3, "FACGE", "0Q101110010mmmmm001011nnnnnddddd") +//INST(FABD_3, "FABD", "0Q101110110mmmmm000101nnnnnddddd") +//INST(FCMGT_reg_3, "FCMGT (register)", "0Q101110110mmmmm001001nnnnnddddd") +//INST(FACGT_3, "FACGT", "0Q101110110mmmmm001011nnnnnddddd") +//INST(FMAXNM_1, "FMAXNM (vector)", "0Q001110010mmmmm000001nnnnnddddd") +INST(FMLA_vec_1, "FMLA (vector)", "0Q001110010mmmmm000011nnnnnddddd") +//INST(FADD_1, "FADD (vector)", "0Q001110010mmmmm000101nnnnnddddd") +//INST(FMAX_1, "FMAX (vector)", "0Q001110010mmmmm001101nnnnnddddd") +//INST(FMINNM_1, "FMINNM (vector)", "0Q001110110mmmmm000001nnnnnddddd") +INST(FMLS_vec_1, "FMLS (vector)", "0Q001110110mmmmm000011nnnnnddddd") +//INST(FSUB_1, "FSUB (vector)", "0Q001110110mmmmm000101nnnnnddddd") +//INST(FMIN_1, "FMIN (vector)", "0Q001110110mmmmm001101nnnnnddddd") +//INST(FMAXNMP_vec_1, "FMAXNMP (vector)", "0Q101110010mmmmm000001nnnnnddddd") +//INST(FADDP_vec_1, "FADDP (vector)", "0Q101110010mmmmm000101nnnnnddddd") +//INST(FMUL_vec_1, "FMUL (vector)", "0Q101110010mmmmm000111nnnnnddddd") +//INST(FMAXP_vec_1, "FMAXP (vector)", "0Q101110010mmmmm001101nnnnnddddd") +//INST(FDIV_1, "FDIV (vector)", "0Q101110010mmmmm001111nnnnnddddd") +//INST(FMINNMP_vec_1, "FMINNMP (vector)", "0Q101110110mmmmm000001nnnnnddddd") +//INST(FMINP_vec_1, "FMINP (vector)", "0Q101110110mmmmm001101nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Three same extra +//INST(SMMLA_vec, "SMMLA", "01001110100mmmmm101001nnnnnddddd") // v8.6 +//INST(UMMLA_vec, "UMMLA", "01101110100mmmmm101001nnnnnddddd") // v8.6 +//INST(USMMLA_vec, "USMMLA", "01001110100mmmmm101011nnnnnddddd") // v8.6 +//INST(SUDOT_element, "SUDOT (by element)", "0Q00111100LMmmmm1111H0nnnnnddddd") // v8.6 +//INST(USDOT_element, "USDOT (by_element)", "0Q00111110LMmmmm1111H0nnnnnddddd") // v8.6 +//INST(USDOT_vec, "USDOT (vector)", "0Q001110100mmmmm100111nnnnnddddd") // v8.6 +INST(SDOT_vec, "SDOT (vector)", "0Q001110zz0mmmmm100101nnnnnddddd") +INST(UDOT_vec, "UDOT (vector)", "0Q101110zz0mmmmm100101nnnnnddddd") +INST(FCMLA_vec, "FCMLA", "0Q101110zz0mmmmm110rr1nnnnnddddd") +INST(FCADD_vec, "FCADD", "0Q101110zz0mmmmm111r01nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD Two-register misc +INST(REV64_asimd, "REV64", "0Q001110zz100000000010nnnnnddddd") +INST(REV16_asimd, "REV16 (vector)", "0Q001110zz100000000110nnnnnddddd") +INST(SADDLP, "SADDLP", "0Q001110zz100000001010nnnnnddddd") +INST(SUQADD_2, "SUQADD", "0Q001110zz100000001110nnnnnddddd") +INST(CLS_asimd, "CLS (vector)", "0Q001110zz100000010010nnnnnddddd") +INST(CNT, "CNT", "0Q001110zz100000010110nnnnnddddd") +INST(SADALP, "SADALP", "0Q001110zz100000011010nnnnnddddd") +INST(SQABS_2, "SQABS", "0Q001110zz100000011110nnnnnddddd") +INST(CMGT_zero_2, "CMGT (zero)", "0Q001110zz100000100010nnnnnddddd") +INST(CMEQ_zero_2, "CMEQ (zero)", "0Q001110zz100000100110nnnnnddddd") +INST(CMLT_2, "CMLT (zero)", "0Q001110zz100000101010nnnnnddddd") +INST(ABS_2, "ABS", "0Q001110zz100000101110nnnnnddddd") +INST(XTN, "XTN, XTN2", "0Q001110zz100001001010nnnnnddddd") +INST(SQXTN_2, "SQXTN, SQXTN2", "0Q001110zz100001010010nnnnnddddd") +INST(FCVTN, "FCVTN, FCVTN2", "0Q0011100z100001011010nnnnnddddd") +INST(FCVTL, "FCVTL, FCVTL2", "0Q0011100z100001011110nnnnnddddd") +INST(FRINTN_1, "FRINTN (vector)", "0Q00111001111001100010nnnnnddddd") +INST(FRINTN_2, "FRINTN (vector)", "0Q0011100z100001100010nnnnnddddd") +INST(FRINTM_1, "FRINTM (vector)", "0Q00111001111001100110nnnnnddddd") +INST(FRINTM_2, "FRINTM (vector)", "0Q0011100z100001100110nnnnnddddd") +//INST(FCVTNS_3, "FCVTNS (vector)", "0Q00111001111001101010nnnnnddddd") +INST(FCVTNS_4, "FCVTNS (vector)", "0Q0011100z100001101010nnnnnddddd") +//INST(FCVTMS_3, "FCVTMS (vector)", "0Q00111001111001101110nnnnnddddd") +INST(FCVTMS_4, "FCVTMS (vector)", "0Q0011100z100001101110nnnnnddddd") +//INST(FCVTAS_3, "FCVTAS (vector)", "0Q00111001111001110010nnnnnddddd") +INST(FCVTAS_4, "FCVTAS (vector)", "0Q0011100z100001110010nnnnnddddd") +//INST(SCVTF_int_3, "SCVTF (vector, integer)", "0Q00111001111001110110nnnnnddddd") +INST(SCVTF_int_4, "SCVTF (vector, integer)", "0Q0011100z100001110110nnnnnddddd") +//INST(FCMGT_zero_3, "FCMGT (zero)", "0Q00111011111000110010nnnnnddddd") +INST(FCMGT_zero_4, "FCMGT (zero)", "0Q0011101z100000110010nnnnnddddd") +INST(FCMEQ_zero_3, "FCMEQ (zero)", "0Q00111011111000110110nnnnnddddd") +INST(FCMEQ_zero_4, "FCMEQ (zero)", "0Q0011101z100000110110nnnnnddddd") +//INST(FCMLT_3, "FCMLT (zero)", "0Q00111011111000111010nnnnnddddd") +INST(FCMLT_4, "FCMLT (zero)", "0Q0011101z100000111010nnnnnddddd") +INST(FABS_1, "FABS (vector)", "0Q00111011111000111110nnnnnddddd") +INST(FABS_2, "FABS (vector)", "0Q0011101z100000111110nnnnnddddd") +INST(FRINTP_1, "FRINTP (vector)", "0Q00111011111001100010nnnnnddddd") +INST(FRINTP_2, "FRINTP (vector)", "0Q0011101z100001100010nnnnnddddd") +INST(FRINTZ_1, "FRINTZ (vector)", "0Q00111011111001100110nnnnnddddd") +INST(FRINTZ_2, "FRINTZ (vector)", "0Q0011101z100001100110nnnnnddddd") +//INST(FCVTPS_3, "FCVTPS (vector)", "0Q00111011111001101010nnnnnddddd") +INST(FCVTPS_4, "FCVTPS (vector)", "0Q0011101z100001101010nnnnnddddd") +//INST(FCVTZS_int_3, "FCVTZS (vector, integer)", "0Q00111011111001101110nnnnnddddd") +INST(FCVTZS_int_4, "FCVTZS (vector, integer)", "0Q0011101z100001101110nnnnnddddd") +INST(URECPE, "URECPE", "0Q0011101z100001110010nnnnnddddd") +INST(FRECPE_3, "FRECPE", "0Q00111011111001110110nnnnnddddd") +INST(FRECPE_4, "FRECPE", "0Q0011101z100001110110nnnnnddddd") +INST(REV32_asimd, "REV32 (vector)", "0Q101110zz100000000010nnnnnddddd") +INST(UADDLP, "UADDLP", "0Q101110zz100000001010nnnnnddddd") +INST(USQADD_2, "USQADD", "0Q101110zz100000001110nnnnnddddd") +INST(CLZ_asimd, "CLZ (vector)", "0Q101110zz100000010010nnnnnddddd") +INST(UADALP, "UADALP", "0Q101110zz100000011010nnnnnddddd") +INST(SQNEG_2, "SQNEG", "0Q101110zz100000011110nnnnnddddd") +INST(CMGE_zero_2, "CMGE (zero)", "0Q101110zz100000100010nnnnnddddd") +INST(CMLE_2, "CMLE (zero)", "0Q101110zz100000100110nnnnnddddd") +INST(NEG_2, "NEG (vector)", "0Q101110zz100000101110nnnnnddddd") +INST(SQXTUN_2, "SQXTUN, SQXTUN2", "0Q101110zz100001001010nnnnnddddd") +INST(SHLL, "SHLL, SHLL2", "0Q101110zz100001001110nnnnnddddd") +INST(UQXTN_2, "UQXTN, UQXTN2", "0Q101110zz100001010010nnnnnddddd") +INST(FCVTXN_2, "FCVTXN, FCVTXN2", "0Q1011100z100001011010nnnnnddddd") +INST(FRINTA_1, "FRINTA (vector)", "0Q10111001111001100010nnnnnddddd") +INST(FRINTA_2, "FRINTA (vector)", "0Q1011100z100001100010nnnnnddddd") +INST(FRINTX_1, "FRINTX (vector)", "0Q10111001111001100110nnnnnddddd") +INST(FRINTX_2, "FRINTX (vector)", "0Q1011100z100001100110nnnnnddddd") +//INST(FCVTNU_3, "FCVTNU (vector)", "0Q10111001111001101010nnnnnddddd") +INST(FCVTNU_4, "FCVTNU (vector)", "0Q1011100z100001101010nnnnnddddd") +//INST(FCVTMU_3, "FCVTMU (vector)", "0Q10111001111001101110nnnnnddddd") +INST(FCVTMU_4, "FCVTMU (vector)", "0Q1011100z100001101110nnnnnddddd") +//INST(FCVTAU_3, "FCVTAU (vector)", "0Q10111001111001110010nnnnnddddd") +INST(FCVTAU_4, "FCVTAU (vector)", "0Q1011100z100001110010nnnnnddddd") +//INST(UCVTF_int_3, "UCVTF (vector, integer)", "0Q10111001111001110110nnnnnddddd") +INST(UCVTF_int_4, "UCVTF (vector, integer)", "0Q1011100z100001110110nnnnnddddd") +INST(NOT, "NOT", "0Q10111000100000010110nnnnnddddd") +INST(RBIT_asimd, "RBIT (vector)", "0Q10111001100000010110nnnnnddddd") +INST(FNEG_1, "FNEG (vector)", "0Q10111011111000111110nnnnnddddd") +INST(FNEG_2, "FNEG (vector)", "0Q1011101z100000111110nnnnnddddd") +INST(FRINTI_1, "FRINTI (vector)", "0Q10111011111001100110nnnnnddddd") +INST(FRINTI_2, "FRINTI (vector)", "0Q1011101z100001100110nnnnnddddd") +//INST(FCMGE_zero_3, "FCMGE (zero)", "0Q10111011111000110010nnnnnddddd") +INST(FCMGE_zero_4, "FCMGE (zero)", "0Q1011101z100000110010nnnnnddddd") +//INST(FCMLE_3, "FCMLE (zero)", "0Q10111011111000110110nnnnnddddd") +INST(FCMLE_4, "FCMLE (zero)", "0Q1011101z100000110110nnnnnddddd") +//INST(FCVTPU_3, "FCVTPU (vector)", "0Q10111011111001101010nnnnnddddd") +INST(FCVTPU_4, "FCVTPU (vector)", "0Q1011101z100001101010nnnnnddddd") +//INST(FCVTZU_int_3, "FCVTZU (vector, integer)", "0Q10111011111001101110nnnnnddddd") +INST(FCVTZU_int_4, "FCVTZU (vector, integer)", "0Q1011101z100001101110nnnnnddddd") +INST(URSQRTE, "URSQRTE", "0Q1011101z100001110010nnnnnddddd") +INST(FRSQRTE_3, "FRSQRTE", "0Q10111011111001110110nnnnnddddd") +INST(FRSQRTE_4, "FRSQRTE", "0Q1011101z100001110110nnnnnddddd") +//INST(FSQRT_1, "FSQRT (vector)", "0Q10111011111001111110nnnnnddddd") +INST(FSQRT_2, "FSQRT (vector)", "0Q1011101z100001111110nnnnnddddd") +//INST(FRINT32X_1, "FRINT32X (vector)", "0Q1011100z100001111110nnnnnddddd") // ARMv8.5 +//INST(FRINT64X_1, "FRINT64X (vector)", "0Q1011100z100001111010nnnnnddddd") // ARMv8.5 +//INST(FRINT32Z_1, "FRINT32Z (vector)", "0Q0011100z100001111010nnnnnddddd") // ARMv8.5 +//INST(FRINT64Z_1, "FRINT64Z (vector)", "0Q0011100z100001111110nnnnnddddd") // ARMv8.5 + +// Data Processing - FP and SIMD - SIMD across lanes +INST(SADDLV, "SADDLV", "0Q001110zz110000001110nnnnnddddd") +INST(SMAXV, "SMAXV", "0Q001110zz110000101010nnnnnddddd") +INST(SMINV, "SMINV", "0Q001110zz110001101010nnnnnddddd") +INST(ADDV, "ADDV", "0Q001110zz110001101110nnnnnddddd") +//INST(FMAXNMV_1, "FMAXNMV", "0Q00111000110000110010nnnnnddddd") +INST(FMAXNMV_2, "FMAXNMV", "0Q1011100z110000110010nnnnnddddd") +//INST(FMAXV_1, "FMAXV", "0Q00111000110000111110nnnnnddddd") +INST(FMAXV_2, "FMAXV", "0Q1011100z110000111110nnnnnddddd") +//INST(FMINNMV_1, "FMINNMV", "0Q00111010110000110010nnnnnddddd") +INST(FMINNMV_2, "FMINNMV", "0Q1011101z110000110010nnnnnddddd") +//INST(FMINV_1, "FMINV", "0Q00111010110000111110nnnnnddddd") +INST(FMINV_2, "FMINV", "0Q1011101z110000111110nnnnnddddd") +INST(UADDLV, "UADDLV", "0Q101110zz110000001110nnnnnddddd") +INST(UMAXV, "UMAXV", "0Q101110zz110000101010nnnnnddddd") +INST(UMINV, "UMINV", "0Q101110zz110001101010nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD three different +INST(SADDL, "SADDL, SADDL2", "0Q001110zz1mmmmm000000nnnnnddddd") +INST(SADDW, "SADDW, SADDW2", "0Q001110zz1mmmmm000100nnnnnddddd") +INST(SSUBL, "SSUBL, SSUBL2", "0Q001110zz1mmmmm001000nnnnnddddd") +INST(SSUBW, "SSUBW, SSUBW2", "0Q001110zz1mmmmm001100nnnnnddddd") +INST(ADDHN, "ADDHN, ADDHN2", "0Q001110zz1mmmmm010000nnnnnddddd") +INST(SABAL, "SABAL, SABAL2", "0Q001110zz1mmmmm010100nnnnnddddd") +INST(SUBHN, "SUBHN, SUBHN2", "0Q001110zz1mmmmm011000nnnnnddddd") +INST(SABDL, "SABDL, SABDL2", "0Q001110zz1mmmmm011100nnnnnddddd") +INST(SMLAL_vec, "SMLAL, SMLAL2 (vector)", "0Q001110zz1mmmmm100000nnnnnddddd") +INST(SMLSL_vec, "SMLSL, SMLSL2 (vector)", "0Q001110zz1mmmmm101000nnnnnddddd") +INST(SMULL_vec, "SMULL, SMULL2 (vector)", "0Q001110zz1mmmmm110000nnnnnddddd") +INST(PMULL, "PMULL, PMULL2", "0Q001110zz1mmmmm111000nnnnnddddd") +INST(UADDL, "UADDL, UADDL2", "0Q101110zz1mmmmm000000nnnnnddddd") +INST(UADDW, "UADDW, UADDW2", "0Q101110zz1mmmmm000100nnnnnddddd") +INST(USUBL, "USUBL, USUBL2", "0Q101110zz1mmmmm001000nnnnnddddd") +INST(USUBW, "USUBW, USUBW2", "0Q101110zz1mmmmm001100nnnnnddddd") +INST(RADDHN, "RADDHN, RADDHN2", "0Q101110zz1mmmmm010000nnnnnddddd") +INST(UABAL, "UABAL, UABAL2", "0Q101110zz1mmmmm010100nnnnnddddd") +INST(RSUBHN, "RSUBHN, RSUBHN2", "0Q101110zz1mmmmm011000nnnnnddddd") +INST(UABDL, "UABDL, UABDL2", "0Q101110zz1mmmmm011100nnnnnddddd") +INST(UMLAL_vec, "UMLAL, UMLAL2 (vector)", "0Q101110zz1mmmmm100000nnnnnddddd") +INST(UMLSL_vec, "UMLSL, UMLSL2 (vector)", "0Q101110zz1mmmmm101000nnnnnddddd") +INST(UMULL_vec, "UMULL, UMULL2 (vector)", "0Q101110zz1mmmmm110000nnnnnddddd") +//INST(SQDMLAL_vec_2, "SQDMLAL, SQDMLAL2 (vector)", "0Q001110zz1mmmmm100100nnnnnddddd") +//INST(SQDMLSL_vec_2, "SQDMLSL, SQDMLSL2 (vector)", "0Q001110zz1mmmmm101100nnnnnddddd") +INST(SQDMULL_vec_2, "SQDMULL, SQDMULL2 (vector)", "0Q001110zz1mmmmm110100nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD three same +INST(SHADD, "SHADD", "0Q001110zz1mmmmm000001nnnnnddddd") +INST(SQADD_2, "SQADD", "0Q001110zz1mmmmm000011nnnnnddddd") +INST(SRHADD, "SRHADD", "0Q001110zz1mmmmm000101nnnnnddddd") +INST(SHSUB, "SHSUB", "0Q001110zz1mmmmm001001nnnnnddddd") +INST(SQSUB_2, "SQSUB", "0Q001110zz1mmmmm001011nnnnnddddd") +INST(CMGT_reg_2, "CMGT (register)", "0Q001110zz1mmmmm001101nnnnnddddd") +INST(CMGE_reg_2, "CMGE (register)", "0Q001110zz1mmmmm001111nnnnnddddd") +INST(SSHL_2, "SSHL", "0Q001110zz1mmmmm010001nnnnnddddd") +INST(SQSHL_reg_2, "SQSHL (register)", "0Q001110zz1mmmmm010011nnnnnddddd") +INST(SRSHL_2, "SRSHL", "0Q001110zz1mmmmm010101nnnnnddddd") +//INST(SQRSHL_2, "SQRSHL", "0Q001110zz1mmmmm010111nnnnnddddd") +INST(SMAX, "SMAX", "0Q001110zz1mmmmm011001nnnnnddddd") +INST(SMIN, "SMIN", "0Q001110zz1mmmmm011011nnnnnddddd") +INST(SABD, "SABD", "0Q001110zz1mmmmm011101nnnnnddddd") +INST(SABA, "SABA", "0Q001110zz1mmmmm011111nnnnnddddd") +INST(ADD_vector, "ADD (vector)", "0Q001110zz1mmmmm100001nnnnnddddd") +INST(CMTST_2, "CMTST", "0Q001110zz1mmmmm100011nnnnnddddd") +INST(MLA_vec, "MLA (vector)", "0Q001110zz1mmmmm100101nnnnnddddd") +INST(MUL_vec, "MUL (vector)", "0Q001110zz1mmmmm100111nnnnnddddd") +INST(SMAXP, "SMAXP", "0Q001110zz1mmmmm101001nnnnnddddd") +INST(SMINP, "SMINP", "0Q001110zz1mmmmm101011nnnnnddddd") +INST(SQDMULH_vec_2, "SQDMULH (vector)", "0Q001110zz1mmmmm101101nnnnnddddd") +INST(ADDP_vec, "ADDP (vector)", "0Q001110zz1mmmmm101111nnnnnddddd") +INST(FMAXNM_2, "FMAXNM (vector)", "0Q0011100z1mmmmm110001nnnnnddddd") +INST(FMLA_vec_2, "FMLA (vector)", "0Q0011100z1mmmmm110011nnnnnddddd") +INST(FADD_2, "FADD (vector)", "0Q0011100z1mmmmm110101nnnnnddddd") +INST(FMAX_2, "FMAX (vector)", "0Q0011100z1mmmmm111101nnnnnddddd") +INST(FMULX_vec_4, "FMULX", "0Q0011100z1mmmmm110111nnnnnddddd") +INST(FCMEQ_reg_4, "FCMEQ (register)", "0Q0011100z1mmmmm111001nnnnnddddd") +//INST(FMLAL_vec_1, "FMLAL, FMLAL2 (vector)", "0Q0011100z1mmmmm111011nnnnnddddd") +INST(FRECPS_4, "FRECPS", "0Q0011100z1mmmmm111111nnnnnddddd") +INST(AND_asimd, "AND (vector)", "0Q001110001mmmmm000111nnnnnddddd") +INST(BIC_asimd_reg, "BIC (vector, register)", "0Q001110011mmmmm000111nnnnnddddd") +INST(FMINNM_2, "FMINNM (vector)", "0Q0011101z1mmmmm110001nnnnnddddd") +INST(FMLS_vec_2, "FMLS (vector)", "0Q0011101z1mmmmm110011nnnnnddddd") +INST(FSUB_2, "FSUB (vector)", "0Q0011101z1mmmmm110101nnnnnddddd") +//INST(FMLSL_vec_1, "FMLSL, FMLSL2 (vector)", "0Q0011101z1mmmmm111011nnnnnddddd") +INST(FMIN_2, "FMIN (vector)", "0Q0011101z1mmmmm111101nnnnnddddd") +INST(FRSQRTS_4, "FRSQRTS", "0Q0011101z1mmmmm111111nnnnnddddd") +INST(ORR_asimd_reg, "ORR (vector, register)", "0Q001110101mmmmm000111nnnnnddddd") +INST(ORN_asimd, "ORN (vector)", "0Q001110111mmmmm000111nnnnnddddd") +INST(UHADD, "UHADD", "0Q101110zz1mmmmm000001nnnnnddddd") +INST(UQADD_2, "UQADD", "0Q101110zz1mmmmm000011nnnnnddddd") +INST(URHADD, "URHADD", "0Q101110zz1mmmmm000101nnnnnddddd") +INST(UHSUB, "UHSUB", "0Q101110zz1mmmmm001001nnnnnddddd") +INST(UQSUB_2, "UQSUB", "0Q101110zz1mmmmm001011nnnnnddddd") +INST(CMHI_2, "CMHI (register)", "0Q101110zz1mmmmm001101nnnnnddddd") +INST(CMHS_2, "CMHS (register)", "0Q101110zz1mmmmm001111nnnnnddddd") +INST(USHL_2, "USHL", "0Q101110zz1mmmmm010001nnnnnddddd") +INST(UQSHL_reg_2, "UQSHL (register)", "0Q101110zz1mmmmm010011nnnnnddddd") +INST(URSHL_2, "URSHL", "0Q101110zz1mmmmm010101nnnnnddddd") +//INST(UQRSHL_2, "UQRSHL", "0Q101110zz1mmmmm010111nnnnnddddd") +INST(UMAX, "UMAX", "0Q101110zz1mmmmm011001nnnnnddddd") +INST(UMIN, "UMIN", "0Q101110zz1mmmmm011011nnnnnddddd") +INST(UABD, "UABD", "0Q101110zz1mmmmm011101nnnnnddddd") +INST(UABA, "UABA", "0Q101110zz1mmmmm011111nnnnnddddd") +INST(SUB_2, "SUB (vector)", "0Q101110zz1mmmmm100001nnnnnddddd") +INST(CMEQ_reg_2, "CMEQ (register)", "0Q101110zz1mmmmm100011nnnnnddddd") +INST(MLS_vec, "MLS (vector)", "0Q101110zz1mmmmm100101nnnnnddddd") +INST(PMUL, "PMUL", "0Q101110zz1mmmmm100111nnnnnddddd") +INST(UMAXP, "UMAXP", "0Q101110zz1mmmmm101001nnnnnddddd") +INST(UMINP, "UMINP", "0Q101110zz1mmmmm101011nnnnnddddd") +INST(SQRDMULH_vec_2, "SQRDMULH (vector)", "0Q101110zz1mmmmm101101nnnnnddddd") +INST(FMAXNMP_vec_2, "FMAXNMP (vector)", "0Q1011100z1mmmmm110001nnnnnddddd") +//INST(FMLAL_vec_2, "FMLAL, FMLAL2 (vector)", "0Q1011100z1mmmmm110011nnnnnddddd") +INST(FADDP_vec_2, "FADDP (vector)", "0Q1011100z1mmmmm110101nnnnnddddd") +INST(FMUL_vec_2, "FMUL (vector)", "0Q1011100z1mmmmm110111nnnnnddddd") +INST(FCMGE_reg_4, "FCMGE (register)", "0Q1011100z1mmmmm111001nnnnnddddd") +INST(FACGE_4, "FACGE", "0Q1011100z1mmmmm111011nnnnnddddd") +INST(FMAXP_vec_2, "FMAXP (vector)", "0Q1011100z1mmmmm111101nnnnnddddd") +INST(FDIV_2, "FDIV (vector)", "0Q1011100z1mmmmm111111nnnnnddddd") +INST(EOR_asimd, "EOR (vector)", "0Q101110001mmmmm000111nnnnnddddd") +INST(BSL, "BSL", "0Q101110011mmmmm000111nnnnnddddd") +INST(FMINNMP_vec_2, "FMINNMP (vector)", "0Q1011101z1mmmmm110001nnnnnddddd") +//INST(FMLSL_vec_2, "FMLSL, FMLSL2 (vector)", "0Q1011101z1mmmmm110011nnnnnddddd") +INST(FABD_4, "FABD", "0Q1011101z1mmmmm110101nnnnnddddd") +INST(FCMGT_reg_4, "FCMGT (register)", "0Q1011101z1mmmmm111001nnnnnddddd") +INST(FACGT_4, "FACGT", "0Q1011101z1mmmmm111011nnnnnddddd") +INST(FMINP_vec_2, "FMINP (vector)", "0Q1011101z1mmmmm111101nnnnnddddd") +INST(BIT, "BIT", "0Q101110101mmmmm000111nnnnnddddd") +INST(BIF, "BIF", "0Q101110111mmmmm000111nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD modified immediate +INST(MOVI, "MOVI, MVNI, ORR, BIC (vector, immediate)", "0Qo0111100000abcmmmm01defghddddd") +INST(FMOV_2, "FMOV (vector, immediate)", "0Qo0111100000abc111101defghddddd") +INST(FMOV_3, "FMOV (vector, immediate)", "0Q00111100000abc111111defghddddd") +INST(UnallocatedEncoding, "Unallocated SIMD modified immediate", "0--0111100000-------11----------") + +// Data Processing - FP and SIMD - SIMD Shift by immediate +INST(SSHR_2, "SSHR", "0Q0011110IIIIiii000001nnnnnddddd") +INST(SSRA_2, "SSRA", "0Q0011110IIIIiii000101nnnnnddddd") +INST(SRSHR_2, "SRSHR", "0Q0011110IIIIiii001001nnnnnddddd") +INST(SRSRA_2, "SRSRA", "0Q0011110IIIIiii001101nnnnnddddd") +INST(SHL_2, "SHL", "0Q0011110IIIIiii010101nnnnnddddd") +INST(SQSHL_imm_2, "SQSHL (immediate)", "0Q0011110IIIIiii011101nnnnnddddd") +INST(SHRN, "SHRN, SHRN2", "0Q0011110IIIIiii100001nnnnnddddd") +INST(RSHRN, "RSHRN, RSHRN2", "0Q0011110IIIIiii100011nnnnnddddd") +INST(SQSHRN_2, "SQSHRN, SQSHRN2", "0Q0011110IIIIiii100101nnnnnddddd") +INST(SQRSHRN_2, "SQRSHRN, SQRSHRN2", "0Q0011110IIIIiii100111nnnnnddddd") +INST(SSHLL, "SSHLL, SSHLL2", "0Q0011110IIIIiii101001nnnnnddddd") +INST(SCVTF_fix_2, "SCVTF (vector, fixed-point)", "0Q0011110IIIIiii111001nnnnnddddd") +INST(FCVTZS_fix_2, "FCVTZS (vector, fixed-point)", "0Q0011110IIIIiii111111nnnnnddddd") +INST(USHR_2, "USHR", "0Q1011110IIIIiii000001nnnnnddddd") +INST(USRA_2, "USRA", "0Q1011110IIIIiii000101nnnnnddddd") +INST(URSHR_2, "URSHR", "0Q1011110IIIIiii001001nnnnnddddd") +INST(URSRA_2, "URSRA", "0Q1011110IIIIiii001101nnnnnddddd") +INST(SRI_2, "SRI", "0Q1011110IIIIiii010001nnnnnddddd") +INST(SLI_2, "SLI", "0Q1011110IIIIiii010101nnnnnddddd") +INST(SQSHLU_2, "SQSHLU", "0Q1011110IIIIiii011001nnnnnddddd") +INST(UQSHL_imm_2, "UQSHL (immediate)", "0Q1011110IIIIiii011101nnnnnddddd") +INST(SQSHRUN_2, "SQSHRUN, SQSHRUN2", "0Q1011110IIIIiii100001nnnnnddddd") +INST(SQRSHRUN_2, "SQRSHRUN, SQRSHRUN2", "0Q1011110IIIIiii100011nnnnnddddd") +INST(UQSHRN_2, "UQSHRN, UQSHRN2", "0Q1011110IIIIiii100101nnnnnddddd") +INST(UQRSHRN_2, "UQRSHRN, UQRSHRN2", "0Q1011110IIIIiii100111nnnnnddddd") +INST(USHLL, "USHLL, USHLL2", "0Q1011110IIIIiii101001nnnnnddddd") +INST(UCVTF_fix_2, "UCVTF (vector, fixed-point)", "0Q1011110IIIIiii111001nnnnnddddd") +INST(FCVTZU_fix_2, "FCVTZU (vector, fixed-point)", "0Q1011110IIIIiii111111nnnnnddddd") + +// Data Processing - FP and SIMD - SIMD vector x indexed element +INST(SMLAL_elt, "SMLAL, SMLAL2 (by element)", "0Q001111zzLMmmmm0010H0nnnnnddddd") +//INST(SQDMLAL_elt_2, "SQDMLAL, SQDMLAL2 (by element)", "0Q001111zzLMmmmm0011H0nnnnnddddd") +INST(SMLSL_elt, "SMLSL, SMLSL2 (by element)", "0Q001111zzLMmmmm0110H0nnnnnddddd") +//INST(SQDMLSL_elt_2, "SQDMLSL, SQDMLSL2 (by element)", "0Q001111zzLMmmmm0111H0nnnnnddddd") +INST(MUL_elt, "MUL (by element)", "0Q001111zzLMmmmm1000H0nnnnnddddd") +INST(SMULL_elt, "SMULL, SMULL2 (by element)", "0Q001111zzLMmmmm1010H0nnnnnddddd") +INST(SQDMULL_elt_2, "SQDMULL, SQDMULL2 (by element)", "0Q001111zzLMmmmm1011H0nnnnnddddd") +INST(SQDMULH_elt_2, "SQDMULH (by element)", "0Q001111zzLMmmmm1100H0nnnnnddddd") +INST(SQRDMULH_elt_2, "SQRDMULH (by element)", "0Q001111zzLMmmmm1101H0nnnnnddddd") +INST(SDOT_elt, "SDOT (by element)", "0Q001111zzLMmmmm1110H0nnnnnddddd") +INST(FMLA_elt_3, "FMLA (by element)", "0Q00111100LMmmmm0001H0nnnnnddddd") +INST(FMLA_elt_4, "FMLA (by element)", "0Q0011111zLMmmmm0001H0nnnnnddddd") +INST(FMLS_elt_3, "FMLS (by element)", "0Q00111100LMmmmm0101H0nnnnnddddd") +INST(FMLS_elt_4, "FMLS (by element)", "0Q0011111zLMmmmm0101H0nnnnnddddd") +//INST(FMUL_elt_3, "FMUL (by element)", "0Q00111100LMmmmm1001H0nnnnnddddd") +INST(FMUL_elt_4, "FMUL (by element)", "0Q0011111zLMmmmm1001H0nnnnnddddd") +//INST(FMLAL_elt_1, "FMLAL, FMLAL2 (by element)", "0Q0011111zLMmmmm0000H0nnnnnddddd") +//INST(FMLAL_elt_2, "FMLAL, FMLAL2 (by element)", "0Q1011111zLMmmmm1000H0nnnnnddddd") +//INST(FMLSL_elt_1, "FMLSL, FMLSL2 (by element)", "0Q0011111zLMmmmm0100H0nnnnnddddd") +//INST(FMLSL_elt_2, "FMLSL, FMLSL2 (by element)", "0Q1011111zLMmmmm1100H0nnnnnddddd") +INST(MLA_elt, "MLA (by element)", "0Q101111zzLMmmmm0000H0nnnnnddddd") +INST(UMLAL_elt, "UMLAL, UMLAL2 (by element)", "0Q101111zzLMmmmm0010H0nnnnnddddd") +INST(MLS_elt, "MLS (by element)", "0Q101111zzLMmmmm0100H0nnnnnddddd") +INST(UMLSL_elt, "UMLSL, UMLSL2 (by element)", "0Q101111zzLMmmmm0110H0nnnnnddddd") +INST(UMULL_elt, "UMULL, UMULL2 (by element)", "0Q101111zzLMmmmm1010H0nnnnnddddd") +//INST(SQRDMLAH_elt_2, "SQRDMLAH (by element)", "0Q101111zzLMmmmm1101H0nnnnnddddd") +INST(UDOT_elt, "UDOT (by element)", "0Q101111zzLMmmmm1110H0nnnnnddddd") +//INST(SQRDMLSH_elt_2, "SQRDMLSH (by element)", "0Q101111zzLMmmmm1111H0nnnnnddddd") +//INST(FMULX_elt_3, "FMULX (by element)", "0Q10111100LMmmmm1001H0nnnnnddddd") +INST(FMULX_elt_4, "FMULX (by element)", "0Q1011111zLMmmmm1001H0nnnnnddddd") +INST(FCMLA_elt, "FCMLA (by element)", "0Q101111zzLMmmmm0rr1H0nnnnnddddd") + +// Data Processing - FP and SIMD - Cryptographic three register +INST(SM3TT1A, "SM3TT1A", "11001110010mmmmm10ii00nnnnnddddd") +INST(SM3TT1B, "SM3TT1B", "11001110010mmmmm10ii01nnnnnddddd") +INST(SM3TT2A, "SM3TT2A", "11001110010mmmmm10ii10nnnnnddddd") +INST(SM3TT2B, "SM3TT2B", "11001110010mmmmm10ii11nnnnnddddd") + +// Data Processing - FP and SIMD - SHA512 three register +INST(SHA512H, "SHA512H", "11001110011mmmmm100000nnnnnddddd") +INST(SHA512H2, "SHA512H2", "11001110011mmmmm100001nnnnnddddd") +INST(SHA512SU1, "SHA512SU1", "11001110011mmmmm100010nnnnnddddd") +INST(RAX1, "RAX1", "11001110011mmmmm100011nnnnnddddd") +INST(SM3PARTW1, "SM3PARTW1", "11001110011mmmmm110000nnnnnddddd") +INST(SM3PARTW2, "SM3PARTW2", "11001110011mmmmm110001nnnnnddddd") +INST(SM4EKEY, "SM4EKEY", "11001110011mmmmm110010nnnnnddddd") +INST(XAR, "XAR", "11001110100mmmmmiiiiiinnnnnddddd") + +// Data Processing - FP and SIMD - Cryptographic four register +INST(EOR3, "EOR3", "11001110000mmmmm0aaaaannnnnddddd") +INST(BCAX, "BCAX", "11001110001mmmmm0aaaaannnnnddddd") +INST(SM3SS1, "SM3SS1", "11001110010mmmmm0aaaaannnnnddddd") + +// Data Processing - FP and SIMD - SHA512 two register +INST(SHA512SU0, "SHA512SU0", "1100111011000000100000nnnnnddddd") +INST(SM4E, "SM4E", "1100111011000000100001nnnnnddddd") + +// Data Processing - FP and SIMD - Conversion between floating point and fixed point +INST(SCVTF_float_fix, "SCVTF (scalar, fixed-point)", "z0011110yy000010ppppppnnnnnddddd") +INST(UCVTF_float_fix, "UCVTF (scalar, fixed-point)", "z0011110yy000011ppppppnnnnnddddd") +INST(FCVTZS_float_fix, "FCVTZS (scalar, fixed-point)", "z0011110yy011000ppppppnnnnnddddd") +INST(FCVTZU_float_fix, "FCVTZU (scalar, fixed-point)", "z0011110yy011001ppppppnnnnnddddd") + +// Data Processing - FP and SIMD - Conversion between floating point and integer +INST(FCVTNS_float, "FCVTNS (scalar)", "z0011110yy100000000000nnnnnddddd") +INST(FCVTNU_float, "FCVTNU (scalar)", "z0011110yy100001000000nnnnnddddd") +INST(SCVTF_float_int, "SCVTF (scalar, integer)", "z0011110yy100010000000nnnnnddddd") +INST(UCVTF_float_int, "UCVTF (scalar, integer)", "z0011110yy100011000000nnnnnddddd") +INST(FCVTAS_float, "FCVTAS (scalar)", "z0011110yy100100000000nnnnnddddd") +INST(FCVTAU_float, "FCVTAU (scalar)", "z0011110yy100101000000nnnnnddddd") +INST(FMOV_float_gen, "FMOV (general)", "z0011110yy10r11o000000nnnnnddddd") +INST(FCVTPS_float, "FCVTPS (scalar)", "z0011110yy101000000000nnnnnddddd") +INST(FCVTPU_float, "FCVTPU (scalar)", "z0011110yy101001000000nnnnnddddd") +INST(FCVTMS_float, "FCVTMS (scalar)", "z0011110yy110000000000nnnnnddddd") +INST(FCVTMU_float, "FCVTMU (scalar)", "z0011110yy110001000000nnnnnddddd") +INST(FCVTZS_float_int, "FCVTZS (scalar, integer)", "z0011110yy111000000000nnnnnddddd") +INST(FCVTZU_float_int, "FCVTZU (scalar, integer)", "z0011110yy111001000000nnnnnddddd") +//INST(FJCVTZS, "FJCVTZS", "0001111001111110000000nnnnnddddd") + +// Data Processing - FP and SIMD - Floating point data processing +INST(FMOV_float, "FMOV (register)", "00011110yy100000010000nnnnnddddd") +INST(FABS_float, "FABS (scalar)", "00011110yy100000110000nnnnnddddd") +INST(FNEG_float, "FNEG (scalar)", "00011110yy100001010000nnnnnddddd") +INST(FSQRT_float, "FSQRT (scalar)", "00011110yy100001110000nnnnnddddd") +INST(FCVT_float, "FCVT", "00011110yy10001oo10000nnnnnddddd") +INST(FRINTN_float, "FRINTN (scalar)", "00011110yy100100010000nnnnnddddd") +INST(FRINTP_float, "FRINTP (scalar)", "00011110yy100100110000nnnnnddddd") +INST(FRINTM_float, "FRINTM (scalar)", "00011110yy100101010000nnnnnddddd") +INST(FRINTZ_float, "FRINTZ (scalar)", "00011110yy100101110000nnnnnddddd") +INST(FRINTA_float, "FRINTA (scalar)", "00011110yy100110010000nnnnnddddd") +INST(FRINTX_float, "FRINTX (scalar)", "00011110yy100111010000nnnnnddddd") +INST(FRINTI_float, "FRINTI (scalar)", "00011110yy100111110000nnnnnddddd") +//INST(FRINT32X_float, "FRINT32X (scalar)", "00011110yy101000110000nnnnnddddd") // ARMv8.5 +//INST(FRINT64X_float, "FRINT64X (scalar)", "00011110yy101001110000nnnnnddddd") // ARMv8.5 +//INST(FRINT32Z_float, "FRINT32Z (scalar)", "00011110yy101000010000nnnnnddddd") // ARMv8.5 +//INST(FRINT64Z_float, "FRINT64Z (scalar)", "00011110yy101001010000nnnnnddddd") // ARMv8.5 + +// Data Processing - FP and SIMD - Floating point compare +INST(FCMP_float, "FCMP", "00011110yy1mmmmm001000nnnnn0o000") +INST(FCMPE_float, "FCMPE", "00011110yy1mmmmm001000nnnnn1o000") + +// Data Processing - FP and SIMD - Floating point immediate +INST(FMOV_float_imm, "FMOV (scalar, immediate)", "00011110yy1iiiiiiii10000000ddddd") + +// Data Processing - FP and SIMD - Floating point conditional compare +INST(FCCMP_float, "FCCMP", "00011110yy1mmmmmcccc01nnnnn0ffff") +INST(FCCMPE_float, "FCCMPE", "00011110yy1mmmmmcccc01nnnnn1ffff") + +// Data Processing - FP and SIMD - Floating point data processing two register +INST(FMUL_float, "FMUL (scalar)", "00011110yy1mmmmm000010nnnnnddddd") +INST(FDIV_float, "FDIV (scalar)", "00011110yy1mmmmm000110nnnnnddddd") +INST(FADD_float, "FADD (scalar)", "00011110yy1mmmmm001010nnnnnddddd") +INST(FSUB_float, "FSUB (scalar)", "00011110yy1mmmmm001110nnnnnddddd") +INST(FMAX_float, "FMAX (scalar)", "00011110yy1mmmmm010010nnnnnddddd") +INST(FMIN_float, "FMIN (scalar)", "00011110yy1mmmmm010110nnnnnddddd") +INST(FMAXNM_float, "FMAXNM (scalar)", "00011110yy1mmmmm011010nnnnnddddd") +INST(FMINNM_float, "FMINNM (scalar)", "00011110yy1mmmmm011110nnnnnddddd") +INST(FNMUL_float, "FNMUL (scalar)", "00011110yy1mmmmm100010nnnnnddddd") + +// Data Processing - FP and SIMD - Floating point conditional select +INST(FCSEL_float, "FCSEL", "00011110yy1mmmmmcccc11nnnnnddddd") + +// Data Processing - FP and SIMD - Floating point data processing three register +INST(FMADD_float, "FMADD", "00011111yy0mmmmm0aaaaannnnnddddd") +INST(FMSUB_float, "FMSUB", "00011111yy0mmmmm1aaaaannnnnddddd") +INST(FNMADD_float, "FNMADD", "00011111yy1mmmmm0aaaaannnnnddddd") +INST(FNMSUB_float, "FNMSUB", "00011111yy1mmmmm1aaaaannnnnddddd") + +// BFloat16 +//INST(BFCVT, "BFCVT", "0001111001100011010000nnnnnddddd") // v8.6 +//INST(BFCVTN, "BFCVTN{2}", "0Q00111010100001011010nnnnnddddd") // v8.6 +//INST(BFDOT_element, "BFDOT (by element)", "0Q00111101LMmmmm1111H0nnnnnddddd") // v8.6 +//INST(BFDOT_vec, "BFDOT (vector)", "0Q101110010mmmmm111111nnnnnddddd") // v8.6 +//INST(BFMLALX_element, "BFMLALX (by element)", "0Q00111111LMmmmm1111H0nnnnnddddd") // v8.6 +//INST(BFMLALX_vector, "BFMLALX (vector)", "0Q101110110mmmmm111111nnnnnddddd") // v8.6 +//INST(BFMMLA, "BFMMLA", "01101110010mmmmm111011nnnnnddddd") // v8.6 +#undef INST + }; + // If a matcher has more bits in its mask it is more specific, so it should come first. + std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) { + // If a matcher has more bits in its mask it is more specific, so it should come first. + return mcl::bit::count_ones(a.second) > mcl::bit::count_ones(b.second); + }); + // Exceptions to the above rule of thumb. + std::stable_partition(list.begin(), list.end(), [&](const auto& e) { + return std::set{ + "MOVI, MVNI, ORR, BIC (vector, immediate)", + "FMOV (vector, immediate)", + "Unallocated SIMD modified immediate", + }.count(e.first) > 0; + }); + for (auto const& e : list) + printf("%s\n", e.inst_final); + } + return 0; +} +