early-access version 2829
This commit is contained in:
parent
aae4e12805
commit
3df4ab9726
71 changed files with 17304 additions and 384 deletions
|
@ -1,7 +1,7 @@
|
||||||
yuzu emulator early access
|
yuzu emulator early access
|
||||||
=============
|
=============
|
||||||
|
|
||||||
This is the source code for early-access 2828.
|
This is the source code for early-access 2829.
|
||||||
|
|
||||||
## Legal Notice
|
## Legal Notice
|
||||||
|
|
||||||
|
|
8
externals/dynarmic/CMakeLists.txt
vendored
8
externals/dynarmic/CMakeLists.txt
vendored
|
@ -1,5 +1,5 @@
|
||||||
cmake_minimum_required(VERSION 3.8)
|
cmake_minimum_required(VERSION 3.8)
|
||||||
project(dynarmic LANGUAGES C CXX ASM VERSION 6.0.1)
|
project(dynarmic LANGUAGES C CXX ASM VERSION 6.1.1)
|
||||||
|
|
||||||
# Determine if we're built as a subproject (using add_subdirectory)
|
# Determine if we're built as a subproject (using add_subdirectory)
|
||||||
# or if this is the master project.
|
# or if this is the master project.
|
||||||
|
@ -131,12 +131,6 @@ if (DYNARMIC_NO_BUNDLED_ROBIN_MAP AND NOT TARGET tsl::robin_map)
|
||||||
find_package(tsl-robin-map REQUIRED)
|
find_package(tsl-robin-map REQUIRED)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (DYNARMIC_NO_BUNDLED_VIXL AND ARCHITECTURE STREQUAL "arm64")
|
|
||||||
find_package(PkgConfig REQUIRED)
|
|
||||||
pkg_check_modules(vixl REQUIRED IMPORTED_TARGET vixl)
|
|
||||||
add_library(vixl ALIAS PkgConfig::vixl)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (DYNARMIC_NO_BUNDLED_XBYAK AND NOT TARGET xbyak)
|
if (DYNARMIC_NO_BUNDLED_XBYAK AND NOT TARGET xbyak)
|
||||||
if (ARCHITECTURE STREQUAL "x86" OR ARCHITECTURE STREQUAL "x86_64")
|
if (ARCHITECTURE STREQUAL "x86" OR ARCHITECTURE STREQUAL "x86_64")
|
||||||
find_package(xbyak REQUIRED)
|
find_package(xbyak REQUIRED)
|
||||||
|
|
14
externals/dynarmic/externals/CMakeLists.txt
vendored
14
externals/dynarmic/externals/CMakeLists.txt
vendored
|
@ -28,18 +28,20 @@ if (NOT TARGET merry::mcl)
|
||||||
add_subdirectory(mcl)
|
add_subdirectory(mcl)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# oaknut
|
||||||
|
|
||||||
|
if (NOT TARGET merry::oaknut)
|
||||||
|
if (ARCHITECTURE STREQUAL "arm64")
|
||||||
|
add_subdirectory(oaknut)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
# robin-map
|
# robin-map
|
||||||
|
|
||||||
if (NOT TARGET tsl::robin_map)
|
if (NOT TARGET tsl::robin_map)
|
||||||
add_subdirectory(robin-map)
|
add_subdirectory(robin-map)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# vixl
|
|
||||||
|
|
||||||
if (ARCHITECTURE STREQUAL "arm64" AND NOT TARGET vixl)
|
|
||||||
add_subdirectory(vixl EXCLUDE_FROM_ALL)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# xbyak
|
# xbyak
|
||||||
|
|
||||||
if (NOT TARGET xbyak)
|
if (NOT TARGET xbyak)
|
||||||
|
|
6
externals/dynarmic/externals/README.md
vendored
6
externals/dynarmic/externals/README.md
vendored
|
@ -5,8 +5,8 @@ This repository uses subtrees to manage some of its externals.
|
||||||
```
|
```
|
||||||
git remote add externals-fmt https://github.com/fmtlib/fmt.git --no-tags
|
git remote add externals-fmt https://github.com/fmtlib/fmt.git --no-tags
|
||||||
git remote add externals-mcl https://github.com/merryhime/mcl.git --no-tags
|
git remote add externals-mcl https://github.com/merryhime/mcl.git --no-tags
|
||||||
|
git remote add externals-oaknut https://github.com/merryhime/oaknut.git --no-tags
|
||||||
git remote add externals-robin-map https://github.com/Tessil/robin-map.git --no-tags
|
git remote add externals-robin-map https://github.com/Tessil/robin-map.git --no-tags
|
||||||
git remote add externals-vixl https://git.linaro.org/arm/vixl.git --no-tags
|
|
||||||
git remote add externals-xbyak https://github.com/herumi/xbyak.git --no-tags
|
git remote add externals-xbyak https://github.com/herumi/xbyak.git --no-tags
|
||||||
git remote add externals-zycore https://github.com/zyantific/zycore-c.git --no-tags
|
git remote add externals-zycore https://github.com/zyantific/zycore-c.git --no-tags
|
||||||
git remote add externals-zydis https://github.com/zyantific/zydis.git --no-tags
|
git remote add externals-zydis https://github.com/zyantific/zydis.git --no-tags
|
||||||
|
@ -19,15 +19,15 @@ Change `<ref>` to refer to the appropriate git reference.
|
||||||
```
|
```
|
||||||
git fetch externals-fmt
|
git fetch externals-fmt
|
||||||
git fetch externals-mcl
|
git fetch externals-mcl
|
||||||
|
git fetch externals-oaknut
|
||||||
git fetch externals-robin-map
|
git fetch externals-robin-map
|
||||||
git fetch externals-vixl
|
|
||||||
git fetch externals-xbyak
|
git fetch externals-xbyak
|
||||||
git fetch externals-zycore
|
git fetch externals-zycore
|
||||||
git fetch externals-zydis
|
git fetch externals-zydis
|
||||||
git subtree pull --squash --prefix=externals/fmt externals-fmt <ref>
|
git subtree pull --squash --prefix=externals/fmt externals-fmt <ref>
|
||||||
git subtree pull --squash --prefix=externals/mcl externals-mcl <ref>
|
git subtree pull --squash --prefix=externals/mcl externals-mcl <ref>
|
||||||
|
git subtree pull --squash --prefix=externals/oaknut externals-oaknut <ref>
|
||||||
git subtree pull --squash --prefix=externals/robin-map externals-robin-map <ref>
|
git subtree pull --squash --prefix=externals/robin-map externals-robin-map <ref>
|
||||||
git subtree pull --squash --prefix=externals/vixl/vixl externals-vixl <ref>
|
|
||||||
git subtree pull --squash --prefix=externals/xbyak externals-xbyak <ref>
|
git subtree pull --squash --prefix=externals/xbyak externals-xbyak <ref>
|
||||||
git subtree pull --squash --prefix=externals/zycore externals-zycore <ref>
|
git subtree pull --squash --prefix=externals/zycore externals-zycore <ref>
|
||||||
git subtree pull --squash --prefix=externals/zydis externals-zydis <ref>
|
git subtree pull --squash --prefix=externals/zydis externals-zydis <ref>
|
||||||
|
|
|
@ -34,7 +34,7 @@ BraceWrapping:
|
||||||
AfterClass: false
|
AfterClass: false
|
||||||
AfterControlStatement: Never
|
AfterControlStatement: Never
|
||||||
AfterEnum: false
|
AfterEnum: false
|
||||||
AfterFunction: false
|
AfterFunction: true
|
||||||
AfterNamespace: false
|
AfterNamespace: false
|
||||||
AfterObjCDeclaration: false
|
AfterObjCDeclaration: false
|
||||||
AfterStruct: false
|
AfterStruct: false
|
||||||
|
@ -62,7 +62,7 @@ ColumnLimit: 0
|
||||||
CommentPragmas: '^ IWYU pragma:'
|
CommentPragmas: '^ IWYU pragma:'
|
||||||
CompactNamespaces: false
|
CompactNamespaces: false
|
||||||
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
||||||
ConstructorInitializerIndentWidth: 8
|
ConstructorInitializerIndentWidth: 4
|
||||||
ContinuationIndentWidth: 4
|
ContinuationIndentWidth: 4
|
||||||
Cpp11BracedListStyle: true
|
Cpp11BracedListStyle: true
|
||||||
DeriveLineEnding: true
|
DeriveLineEnding: true
|
||||||
|
|
19
externals/dynarmic/externals/mcl/CMakeLists.txt
vendored
19
externals/dynarmic/externals/mcl/CMakeLists.txt
vendored
|
@ -1,10 +1,18 @@
|
||||||
cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
|
||||||
include(GNUInstallDirs)
|
include(GNUInstallDirs)
|
||||||
|
|
||||||
project(mcl LANGUAGES CXX VERSION 0.1.8)
|
project(mcl LANGUAGES CXX VERSION 0.1.11)
|
||||||
|
|
||||||
|
# Determine if we're built as a subproject (using add_subdirectory)
|
||||||
|
# or if this is the master project.
|
||||||
|
set(MASTER_PROJECT OFF)
|
||||||
|
if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
|
||||||
|
set(MASTER_PROJECT ON)
|
||||||
|
endif()
|
||||||
|
|
||||||
# Project options
|
# Project options
|
||||||
option(MCL_WARNINGS_AS_ERRORS "Warnings as errors" ON)
|
option(MCL_WARNINGS_AS_ERRORS "Warnings as errors" ${MASTER_PROJECT})
|
||||||
|
option(MCL_INSTALL "Enable installation" ${MASTER_PROJECT})
|
||||||
|
|
||||||
# Default to a Release build
|
# Default to a Release build
|
||||||
if (NOT CMAKE_BUILD_TYPE)
|
if (NOT CMAKE_BUILD_TYPE)
|
||||||
|
@ -85,7 +93,7 @@ endif()
|
||||||
# Dependencies
|
# Dependencies
|
||||||
|
|
||||||
if (NOT TARGET Catch2::Catch2)
|
if (NOT TARGET Catch2::Catch2)
|
||||||
find_package(Catch2 QUIET)
|
find_package(Catch2 3 QUIET)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (NOT TARGET fmt::fmt)
|
if (NOT TARGET fmt::fmt)
|
||||||
|
@ -95,12 +103,12 @@ endif()
|
||||||
# Project files
|
# Project files
|
||||||
|
|
||||||
add_subdirectory(src)
|
add_subdirectory(src)
|
||||||
if (TARGET Catch2::Catch2)
|
if (TARGET Catch2::Catch2 AND MASTER_PROJECT)
|
||||||
add_subdirectory(tests)
|
add_subdirectory(tests)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Install instructions
|
# Install instructions
|
||||||
|
if (MCL_INSTALL)
|
||||||
include(GNUInstallDirs)
|
include(GNUInstallDirs)
|
||||||
include(CMakePackageConfigHelpers)
|
include(CMakePackageConfigHelpers)
|
||||||
|
|
||||||
|
@ -124,3 +132,4 @@ install(FILES
|
||||||
)
|
)
|
||||||
|
|
||||||
install(DIRECTORY include/ TYPE INCLUDE FILES_MATCHING PATTERN "*.hpp")
|
install(DIRECTORY include/ TYPE INCLUDE FILES_MATCHING PATTERN "*.hpp")
|
||||||
|
endif()
|
||||||
|
|
|
@ -13,11 +13,12 @@
|
||||||
|
|
||||||
namespace mcl::detail {
|
namespace mcl::detail {
|
||||||
|
|
||||||
[[noreturn]] void assert_terminate_impl(fmt::string_view msg, fmt::format_args args);
|
[[noreturn]] void assert_terminate_impl(const char* expr_str, fmt::string_view msg, fmt::format_args args);
|
||||||
|
|
||||||
template<typename... Ts>
|
template<typename... Ts>
|
||||||
[[noreturn]] void assert_terminate(fmt::string_view msg, Ts... args) {
|
[[noreturn]] void assert_terminate(const char* expr_str, fmt::string_view msg, Ts... args)
|
||||||
assert_terminate_impl(msg, fmt::make_format_args(args...));
|
{
|
||||||
|
assert_terminate_impl(expr_str, msg, fmt::make_format_args(args...));
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace mcl::detail
|
} // namespace mcl::detail
|
||||||
|
@ -32,7 +33,7 @@ template<typename... Ts>
|
||||||
} \
|
} \
|
||||||
} else { \
|
} else { \
|
||||||
if (!(expr)) [[unlikely]] { \
|
if (!(expr)) [[unlikely]] { \
|
||||||
::mcl::detail::assert_terminate(#expr); \
|
::mcl::detail::assert_terminate(#expr, "(none)"); \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
}()
|
}()
|
||||||
|
@ -45,12 +46,12 @@ template<typename... Ts>
|
||||||
} \
|
} \
|
||||||
} else { \
|
} else { \
|
||||||
if (!(expr)) [[unlikely]] { \
|
if (!(expr)) [[unlikely]] { \
|
||||||
::mcl::detail::assert_terminate(#expr "\nMessage: " __VA_ARGS__); \
|
::mcl::detail::assert_terminate(#expr, __VA_ARGS__); \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
}()
|
}()
|
||||||
|
|
||||||
#define ASSERT_FALSE(...) ::mcl::detail::assert_terminate("false\nMessage: " __VA_ARGS__)
|
#define ASSERT_FALSE(...) ::mcl::detail::assert_terminate("false", __VA_ARGS__)
|
||||||
|
|
||||||
#if defined(NDEBUG) || defined(MCL_IGNORE_ASSERTS)
|
#if defined(NDEBUG) || defined(MCL_IGNORE_ASSERTS)
|
||||||
# define DEBUG_ASSERT(expr) ASSUME(expr)
|
# define DEBUG_ASSERT(expr) ASSUME(expr)
|
||||||
|
|
|
@ -13,12 +13,14 @@
|
||||||
namespace mcl::bit {
|
namespace mcl::bit {
|
||||||
|
|
||||||
template<BitIntegral T>
|
template<BitIntegral T>
|
||||||
inline size_t count_ones(T x) {
|
inline size_t count_ones(T x)
|
||||||
|
{
|
||||||
return std::bitset<bitsizeof<T>>(x).count();
|
return std::bitset<bitsizeof<T>>(x).count();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<BitIntegral T>
|
template<BitIntegral T>
|
||||||
constexpr size_t count_leading_zeros(T x) {
|
constexpr size_t count_leading_zeros(T x)
|
||||||
|
{
|
||||||
size_t result = bitsizeof<T>;
|
size_t result = bitsizeof<T>;
|
||||||
while (x != 0) {
|
while (x != 0) {
|
||||||
x >>= 1;
|
x >>= 1;
|
||||||
|
@ -28,7 +30,8 @@ constexpr size_t count_leading_zeros(T x) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template<BitIntegral T>
|
template<BitIntegral T>
|
||||||
constexpr int highest_set_bit(T x) {
|
constexpr int highest_set_bit(T x)
|
||||||
|
{
|
||||||
int result = -1;
|
int result = -1;
|
||||||
while (x != 0) {
|
while (x != 0) {
|
||||||
x >>= 1;
|
x >>= 1;
|
||||||
|
@ -38,7 +41,8 @@ constexpr int highest_set_bit(T x) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template<BitIntegral T>
|
template<BitIntegral T>
|
||||||
constexpr size_t lowest_set_bit(T x) {
|
constexpr size_t lowest_set_bit(T x)
|
||||||
|
{
|
||||||
if (x == 0) {
|
if (x == 0) {
|
||||||
return bitsizeof<T>;
|
return bitsizeof<T>;
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,7 +13,8 @@ namespace mcl::bit {
|
||||||
|
|
||||||
/// Create a mask with `count` number of one bits.
|
/// Create a mask with `count` number of one bits.
|
||||||
template<size_t count, BitIntegral T>
|
template<size_t count, BitIntegral T>
|
||||||
constexpr T ones() {
|
constexpr T ones()
|
||||||
|
{
|
||||||
static_assert(count <= bitsizeof<T>, "count larger than bitsize of T");
|
static_assert(count <= bitsizeof<T>, "count larger than bitsize of T");
|
||||||
|
|
||||||
if constexpr (count == 0) {
|
if constexpr (count == 0) {
|
||||||
|
@ -25,7 +26,8 @@ constexpr T ones() {
|
||||||
|
|
||||||
/// Create a mask with `count` number of one bits.
|
/// Create a mask with `count` number of one bits.
|
||||||
template<BitIntegral T>
|
template<BitIntegral T>
|
||||||
constexpr T ones(size_t count) {
|
constexpr T ones(size_t count)
|
||||||
|
{
|
||||||
ASSERT_MSG(count <= bitsizeof<T>, "count larger than bitsize of T");
|
ASSERT_MSG(count <= bitsizeof<T>, "count larger than bitsize of T");
|
||||||
|
|
||||||
if (count == 0) {
|
if (count == 0) {
|
||||||
|
@ -36,7 +38,8 @@ constexpr T ones(size_t count) {
|
||||||
|
|
||||||
/// Create a mask of type T for bits [begin_bit, end_bit] inclusive.
|
/// Create a mask of type T for bits [begin_bit, end_bit] inclusive.
|
||||||
template<size_t begin_bit, size_t end_bit, BitIntegral T>
|
template<size_t begin_bit, size_t end_bit, BitIntegral T>
|
||||||
constexpr T mask() {
|
constexpr T mask()
|
||||||
|
{
|
||||||
static_assert(begin_bit <= end_bit, "invalid bit range (position of beginning bit cannot be greater than that of end bit)");
|
static_assert(begin_bit <= end_bit, "invalid bit range (position of beginning bit cannot be greater than that of end bit)");
|
||||||
static_assert(begin_bit < bitsizeof<T>, "begin_bit must be smaller than size of T");
|
static_assert(begin_bit < bitsizeof<T>, "begin_bit must be smaller than size of T");
|
||||||
static_assert(end_bit < bitsizeof<T>, "end_bit must be smaller than size of T");
|
static_assert(end_bit < bitsizeof<T>, "end_bit must be smaller than size of T");
|
||||||
|
@ -46,7 +49,8 @@ constexpr T mask() {
|
||||||
|
|
||||||
/// Create a mask of type T for bits [begin_bit, end_bit] inclusive.
|
/// Create a mask of type T for bits [begin_bit, end_bit] inclusive.
|
||||||
template<BitIntegral T>
|
template<BitIntegral T>
|
||||||
constexpr T mask(size_t begin_bit, size_t end_bit) {
|
constexpr T mask(size_t begin_bit, size_t end_bit)
|
||||||
|
{
|
||||||
ASSERT_MSG(begin_bit <= end_bit, "invalid bit range (position of beginning bit cannot be greater than that of end bit)");
|
ASSERT_MSG(begin_bit <= end_bit, "invalid bit range (position of beginning bit cannot be greater than that of end bit)");
|
||||||
ASSERT_MSG(begin_bit < bitsizeof<T>, "begin_bit must be smaller than size of T");
|
ASSERT_MSG(begin_bit < bitsizeof<T>, "begin_bit must be smaller than size of T");
|
||||||
ASSERT_MSG(end_bit < bitsizeof<T>, "end_bit must be smaller than size of T");
|
ASSERT_MSG(end_bit < bitsizeof<T>, "end_bit must be smaller than size of T");
|
||||||
|
@ -56,91 +60,104 @@ constexpr T mask(size_t begin_bit, size_t end_bit) {
|
||||||
|
|
||||||
/// Extract bits [begin_bit, end_bit] inclusive from value of type T.
|
/// Extract bits [begin_bit, end_bit] inclusive from value of type T.
|
||||||
template<size_t begin_bit, size_t end_bit, BitIntegral T>
|
template<size_t begin_bit, size_t end_bit, BitIntegral T>
|
||||||
constexpr T get_bits(T value) {
|
constexpr T get_bits(T value)
|
||||||
|
{
|
||||||
constexpr T m = mask<begin_bit, end_bit, T>();
|
constexpr T m = mask<begin_bit, end_bit, T>();
|
||||||
return (value & m) >> begin_bit;
|
return (value & m) >> begin_bit;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extract bits [begin_bit, end_bit] inclusive from value of type T.
|
/// Extract bits [begin_bit, end_bit] inclusive from value of type T.
|
||||||
template<BitIntegral T>
|
template<BitIntegral T>
|
||||||
constexpr T get_bits(size_t begin_bit, size_t end_bit, T value) {
|
constexpr T get_bits(size_t begin_bit, size_t end_bit, T value)
|
||||||
|
{
|
||||||
const T m = mask<T>(begin_bit, end_bit);
|
const T m = mask<T>(begin_bit, end_bit);
|
||||||
return (value & m) >> begin_bit;
|
return (value & m) >> begin_bit;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Clears bits [begin_bit, end_bit] inclusive of value of type T.
|
/// Clears bits [begin_bit, end_bit] inclusive of value of type T.
|
||||||
template<size_t begin_bit, size_t end_bit, BitIntegral T>
|
template<size_t begin_bit, size_t end_bit, BitIntegral T>
|
||||||
constexpr T clear_bits(T value) {
|
constexpr T clear_bits(T value)
|
||||||
|
{
|
||||||
constexpr T m = mask<begin_bit, end_bit, T>();
|
constexpr T m = mask<begin_bit, end_bit, T>();
|
||||||
return value & ~m;
|
return value & ~m;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Clears bits [begin_bit, end_bit] inclusive of value of type T.
|
/// Clears bits [begin_bit, end_bit] inclusive of value of type T.
|
||||||
template<BitIntegral T>
|
template<BitIntegral T>
|
||||||
constexpr T clear_bits(size_t begin_bit, size_t end_bit, T value) {
|
constexpr T clear_bits(size_t begin_bit, size_t end_bit, T value)
|
||||||
|
{
|
||||||
const T m = mask<T>(begin_bit, end_bit);
|
const T m = mask<T>(begin_bit, end_bit);
|
||||||
return value & ~m;
|
return value & ~m;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Modifies bits [begin_bit, end_bit] inclusive of value of type T.
|
/// Modifies bits [begin_bit, end_bit] inclusive of value of type T.
|
||||||
template<size_t begin_bit, size_t end_bit, BitIntegral T>
|
template<size_t begin_bit, size_t end_bit, BitIntegral T>
|
||||||
constexpr T set_bits(T value, T new_bits) {
|
constexpr T set_bits(T value, T new_bits)
|
||||||
|
{
|
||||||
constexpr T m = mask<begin_bit, end_bit, T>();
|
constexpr T m = mask<begin_bit, end_bit, T>();
|
||||||
return (value & ~m) | ((new_bits << begin_bit) & m);
|
return (value & ~m) | ((new_bits << begin_bit) & m);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Modifies bits [begin_bit, end_bit] inclusive of value of type T.
|
/// Modifies bits [begin_bit, end_bit] inclusive of value of type T.
|
||||||
template<BitIntegral T>
|
template<BitIntegral T>
|
||||||
constexpr T set_bits(size_t begin_bit, size_t end_bit, T value, T new_bits) {
|
constexpr T set_bits(size_t begin_bit, size_t end_bit, T value, T new_bits)
|
||||||
|
{
|
||||||
const T m = mask<T>(begin_bit, end_bit);
|
const T m = mask<T>(begin_bit, end_bit);
|
||||||
return (value & ~m) | ((new_bits << begin_bit) & m);
|
return (value & ~m) | ((new_bits << begin_bit) & m);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extract bit at bit_position from value of type T.
|
/// Extract bit at bit_position from value of type T.
|
||||||
template<size_t bit_position, BitIntegral T>
|
template<size_t bit_position, BitIntegral T>
|
||||||
constexpr bool get_bit(T value) {
|
constexpr bool get_bit(T value)
|
||||||
|
{
|
||||||
constexpr T m = mask<bit_position, bit_position, T>();
|
constexpr T m = mask<bit_position, bit_position, T>();
|
||||||
return (value & m) != 0;
|
return (value & m) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extract bit at bit_position from value of type T.
|
/// Extract bit at bit_position from value of type T.
|
||||||
template<BitIntegral T>
|
template<BitIntegral T>
|
||||||
constexpr bool get_bit(size_t bit_position, T value) {
|
constexpr bool get_bit(size_t bit_position, T value)
|
||||||
|
{
|
||||||
const T m = mask<T>(bit_position, bit_position);
|
const T m = mask<T>(bit_position, bit_position);
|
||||||
return (value & m) != 0;
|
return (value & m) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Clears bit at bit_position of value of type T.
|
/// Clears bit at bit_position of value of type T.
|
||||||
template<size_t bit_position, BitIntegral T>
|
template<size_t bit_position, BitIntegral T>
|
||||||
constexpr T clear_bit(T value) {
|
constexpr T clear_bit(T value)
|
||||||
|
{
|
||||||
constexpr T m = mask<bit_position, bit_position, T>();
|
constexpr T m = mask<bit_position, bit_position, T>();
|
||||||
return value & ~m;
|
return value & ~m;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Clears bit at bit_position of value of type T.
|
/// Clears bit at bit_position of value of type T.
|
||||||
template<BitIntegral T>
|
template<BitIntegral T>
|
||||||
constexpr T clear_bit(size_t bit_position, T value) {
|
constexpr T clear_bit(size_t bit_position, T value)
|
||||||
|
{
|
||||||
const T m = mask<T>(bit_position, bit_position);
|
const T m = mask<T>(bit_position, bit_position);
|
||||||
return value & ~m;
|
return value & ~m;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Modifies bit at bit_position of value of type T.
|
/// Modifies bit at bit_position of value of type T.
|
||||||
template<size_t bit_position, BitIntegral T>
|
template<size_t bit_position, BitIntegral T>
|
||||||
constexpr T set_bit(T value, bool new_bit) {
|
constexpr T set_bit(T value, bool new_bit)
|
||||||
|
{
|
||||||
constexpr T m = mask<bit_position, bit_position, T>();
|
constexpr T m = mask<bit_position, bit_position, T>();
|
||||||
return (value & ~m) | (new_bit ? m : static_cast<T>(0));
|
return (value & ~m) | (new_bit ? m : static_cast<T>(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Modifies bit at bit_position of value of type T.
|
/// Modifies bit at bit_position of value of type T.
|
||||||
template<BitIntegral T>
|
template<BitIntegral T>
|
||||||
constexpr T set_bit(size_t bit_position, T value, bool new_bit) {
|
constexpr T set_bit(size_t bit_position, T value, bool new_bit)
|
||||||
|
{
|
||||||
const T m = mask<T>(bit_position, bit_position);
|
const T m = mask<T>(bit_position, bit_position);
|
||||||
return (value & ~m) | (new_bit ? m : static_cast<T>(0));
|
return (value & ~m) | (new_bit ? m : static_cast<T>(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sign-extends a value that has bit_count bits to the full bitwidth of type T.
|
/// Sign-extends a value that has bit_count bits to the full bitwidth of type T.
|
||||||
template<size_t bit_count, BitIntegral T>
|
template<size_t bit_count, BitIntegral T>
|
||||||
constexpr T sign_extend(T value) {
|
constexpr T sign_extend(T value)
|
||||||
|
{
|
||||||
static_assert(bit_count != 0, "cannot sign-extend zero-sized value");
|
static_assert(bit_count != 0, "cannot sign-extend zero-sized value");
|
||||||
|
|
||||||
using S = std::make_signed_t<T>;
|
using S = std::make_signed_t<T>;
|
||||||
|
@ -150,7 +167,8 @@ constexpr T sign_extend(T value) {
|
||||||
|
|
||||||
/// Sign-extends a value that has bit_count bits to the full bitwidth of type T.
|
/// Sign-extends a value that has bit_count bits to the full bitwidth of type T.
|
||||||
template<BitIntegral T>
|
template<BitIntegral T>
|
||||||
constexpr T sign_extend(size_t bit_count, T value) {
|
constexpr T sign_extend(size_t bit_count, T value)
|
||||||
|
{
|
||||||
ASSERT_MSG(bit_count != 0, "cannot sign-extend zero-sized value");
|
ASSERT_MSG(bit_count != 0, "cannot sign-extend zero-sized value");
|
||||||
|
|
||||||
using S = std::make_signed_t<T>;
|
using S = std::make_signed_t<T>;
|
||||||
|
@ -160,7 +178,8 @@ constexpr T sign_extend(size_t bit_count, T value) {
|
||||||
|
|
||||||
/// Replicate an element across a value of type T.
|
/// Replicate an element across a value of type T.
|
||||||
template<size_t element_size, BitIntegral T>
|
template<size_t element_size, BitIntegral T>
|
||||||
constexpr T replicate_element(T value) {
|
constexpr T replicate_element(T value)
|
||||||
|
{
|
||||||
static_assert(element_size <= bitsizeof<T>, "element_size is too large");
|
static_assert(element_size <= bitsizeof<T>, "element_size is too large");
|
||||||
static_assert(bitsizeof<T> % element_size == 0, "bitsize of T not divisible by element_size");
|
static_assert(bitsizeof<T> % element_size == 0, "bitsize of T not divisible by element_size");
|
||||||
|
|
||||||
|
@ -173,7 +192,8 @@ constexpr T replicate_element(T value) {
|
||||||
|
|
||||||
/// Replicate an element of type U across a value of type T.
|
/// Replicate an element of type U across a value of type T.
|
||||||
template<BitIntegral U, BitIntegral T>
|
template<BitIntegral U, BitIntegral T>
|
||||||
constexpr T replicate_element(T value) {
|
constexpr T replicate_element(T value)
|
||||||
|
{
|
||||||
static_assert(bitsizeof<U> <= bitsizeof<T>, "element_size is too large");
|
static_assert(bitsizeof<U> <= bitsizeof<T>, "element_size is too large");
|
||||||
|
|
||||||
return replicate_element<bitsizeof<U>, T>(value);
|
return replicate_element<bitsizeof<U>, T>(value);
|
||||||
|
@ -181,7 +201,8 @@ constexpr T replicate_element(T value) {
|
||||||
|
|
||||||
/// Replicate an element across a value of type T.
|
/// Replicate an element across a value of type T.
|
||||||
template<BitIntegral T>
|
template<BitIntegral T>
|
||||||
constexpr T replicate_element(size_t element_size, T value) {
|
constexpr T replicate_element(size_t element_size, T value)
|
||||||
|
{
|
||||||
ASSERT_MSG(element_size <= bitsizeof<T>, "element_size is too large");
|
ASSERT_MSG(element_size <= bitsizeof<T>, "element_size is too large");
|
||||||
ASSERT_MSG(bitsizeof<T> % element_size == 0, "bitsize of T not divisible by element_size");
|
ASSERT_MSG(bitsizeof<T> % element_size == 0, "bitsize of T not divisible by element_size");
|
||||||
|
|
||||||
|
@ -192,7 +213,8 @@ constexpr T replicate_element(size_t element_size, T value) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template<BitIntegral T>
|
template<BitIntegral T>
|
||||||
constexpr bool most_significant_bit(T value) {
|
constexpr bool most_significant_bit(T value)
|
||||||
|
{
|
||||||
return get_bit<bitsizeof<T> - 1, T>(value);
|
return get_bit<bitsizeof<T> - 1, T>(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,8 @@
|
||||||
namespace mcl::bit {
|
namespace mcl::bit {
|
||||||
|
|
||||||
template<BitIntegral T>
|
template<BitIntegral T>
|
||||||
constexpr T rotate_right(T x, size_t amount) {
|
constexpr T rotate_right(T x, size_t amount)
|
||||||
|
{
|
||||||
amount %= bitsizeof<T>;
|
amount %= bitsizeof<T>;
|
||||||
if (amount == 0) {
|
if (amount == 0) {
|
||||||
return x;
|
return x;
|
||||||
|
@ -20,7 +21,8 @@ constexpr T rotate_right(T x, size_t amount) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template<BitIntegral T>
|
template<BitIntegral T>
|
||||||
constexpr T rotate_left(T x, size_t amount) {
|
constexpr T rotate_left(T x, size_t amount)
|
||||||
|
{
|
||||||
amount %= bitsizeof<T>;
|
amount %= bitsizeof<T>;
|
||||||
if (amount == 0) {
|
if (amount == 0) {
|
||||||
return x;
|
return x;
|
||||||
|
|
|
@ -8,18 +8,21 @@
|
||||||
|
|
||||||
namespace mcl::bit {
|
namespace mcl::bit {
|
||||||
|
|
||||||
constexpr u16 swap_bytes_16(u16 value) {
|
constexpr u16 swap_bytes_16(u16 value)
|
||||||
|
{
|
||||||
return static_cast<u16>(u32{value} >> 8 | u32{value} << 8);
|
return static_cast<u16>(u32{value} >> 8 | u32{value} << 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr u32 swap_bytes_32(u32 value) {
|
constexpr u32 swap_bytes_32(u32 value)
|
||||||
|
{
|
||||||
return ((value & 0xff000000u) >> 24)
|
return ((value & 0xff000000u) >> 24)
|
||||||
| ((value & 0x00ff0000u) >> 8)
|
| ((value & 0x00ff0000u) >> 8)
|
||||||
| ((value & 0x0000ff00u) << 8)
|
| ((value & 0x0000ff00u) << 8)
|
||||||
| ((value & 0x000000ffu) << 24);
|
| ((value & 0x000000ffu) << 24);
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr u64 swap_bytes_64(u64 value) {
|
constexpr u64 swap_bytes_64(u64 value)
|
||||||
|
{
|
||||||
return ((value & 0xff00000000000000ull) >> 56)
|
return ((value & 0xff00000000000000ull) >> 56)
|
||||||
| ((value & 0x00ff000000000000ull) >> 40)
|
| ((value & 0x00ff000000000000ull) >> 40)
|
||||||
| ((value & 0x0000ff0000000000ull) >> 24)
|
| ((value & 0x0000ff0000000000ull) >> 24)
|
||||||
|
@ -30,19 +33,22 @@ constexpr u64 swap_bytes_64(u64 value) {
|
||||||
| ((value & 0x00000000000000ffull) << 56);
|
| ((value & 0x00000000000000ffull) << 56);
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr u32 swap_halves_32(u32 value) {
|
constexpr u32 swap_halves_32(u32 value)
|
||||||
|
{
|
||||||
return ((value & 0xffff0000u) >> 16)
|
return ((value & 0xffff0000u) >> 16)
|
||||||
| ((value & 0x0000ffffu) << 16);
|
| ((value & 0x0000ffffu) << 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr u64 swap_halves_64(u64 value) {
|
constexpr u64 swap_halves_64(u64 value)
|
||||||
|
{
|
||||||
return ((value & 0xffff000000000000ull) >> 48)
|
return ((value & 0xffff000000000000ull) >> 48)
|
||||||
| ((value & 0x0000ffff00000000ull) >> 16)
|
| ((value & 0x0000ffff00000000ull) >> 16)
|
||||||
| ((value & 0x00000000ffff0000ull) << 16)
|
| ((value & 0x00000000ffff0000ull) << 16)
|
||||||
| ((value & 0x000000000000ffffull) << 48);
|
| ((value & 0x000000000000ffffull) << 48);
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr u64 swap_words_64(u64 value) {
|
constexpr u64 swap_words_64(u64 value)
|
||||||
|
{
|
||||||
return ((value & 0xffffffff00000000ull) >> 32)
|
return ((value & 0xffffffff00000000ull) >> 32)
|
||||||
| ((value & 0x00000000ffffffffull) << 32);
|
| ((value & 0x00000000ffffffffull) << 32);
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,8 @@ namespace mcl {
|
||||||
|
|
||||||
/// Reinterpret objects of one type as another by bit-casting between object representations.
|
/// Reinterpret objects of one type as another by bit-casting between object representations.
|
||||||
template<class Dest, class Source>
|
template<class Dest, class Source>
|
||||||
inline Dest bit_cast(const Source& source) noexcept {
|
inline Dest bit_cast(const Source& source) noexcept
|
||||||
|
{
|
||||||
static_assert(sizeof(Dest) == sizeof(Source), "size of destination and source objects must be equal");
|
static_assert(sizeof(Dest) == sizeof(Source), "size of destination and source objects must be equal");
|
||||||
static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable.");
|
static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable.");
|
||||||
static_assert(std::is_trivially_copyable_v<Source>, "source type must be trivially copyable");
|
static_assert(std::is_trivially_copyable_v<Source>, "source type must be trivially copyable");
|
||||||
|
@ -24,7 +25,8 @@ inline Dest bit_cast(const Source& source) noexcept {
|
||||||
/// Reinterpret objects of any arbitrary type as another type by bit-casting between object representations.
|
/// Reinterpret objects of any arbitrary type as another type by bit-casting between object representations.
|
||||||
/// Note that here we do not verify if source pointed to by source_ptr has enough bytes to read from.
|
/// Note that here we do not verify if source pointed to by source_ptr has enough bytes to read from.
|
||||||
template<class Dest, class SourcePtr>
|
template<class Dest, class SourcePtr>
|
||||||
inline Dest bit_cast_pointee(const SourcePtr source_ptr) noexcept {
|
inline Dest bit_cast_pointee(const SourcePtr source_ptr) noexcept
|
||||||
|
{
|
||||||
static_assert(sizeof(SourcePtr) == sizeof(void*), "source pointer must have size of a pointer");
|
static_assert(sizeof(SourcePtr) == sizeof(void*), "source pointer must have size of a pointer");
|
||||||
static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable.");
|
static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable.");
|
||||||
|
|
||||||
|
|
35
externals/dynarmic/externals/mcl/include/mcl/container/detail/meta_byte.hpp
vendored
Executable file
35
externals/dynarmic/externals/mcl/include/mcl/container/detail/meta_byte.hpp
vendored
Executable file
|
@ -0,0 +1,35 @@
|
||||||
|
// This file is part of the mcl project.
|
||||||
|
// Copyright (c) 2022 merryhime
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "mcl/bitsizeof.hpp"
|
||||||
|
#include "mcl/stdint.hpp"
|
||||||
|
|
||||||
|
namespace mcl::detail {
|
||||||
|
|
||||||
|
/// if MSB is 0, this is a full slot. remaining 7 bits is a partial hash of the key.
|
||||||
|
/// if MSB is 1, this is a non-full slot.
|
||||||
|
enum class meta_byte : u8 {
|
||||||
|
empty = 0xff,
|
||||||
|
tombstone = 0x80,
|
||||||
|
end_sentinel = 0x88,
|
||||||
|
};
|
||||||
|
|
||||||
|
inline bool is_full(meta_byte mb)
|
||||||
|
{
|
||||||
|
return (static_cast<u8>(mb) & 0x80) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline meta_byte meta_byte_from_hash(size_t hash)
|
||||||
|
{
|
||||||
|
return static_cast<meta_byte>(hash >> (bitsizeof<size_t> - 7));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline size_t group_index_from_hash(size_t hash, size_t group_index_mask)
|
||||||
|
{
|
||||||
|
return hash & group_index_mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mcl::detail
|
263
externals/dynarmic/externals/mcl/include/mcl/container/detail/meta_byte_group.hpp
vendored
Executable file
263
externals/dynarmic/externals/mcl/include/mcl/container/detail/meta_byte_group.hpp
vendored
Executable file
|
@ -0,0 +1,263 @@
|
||||||
|
// This file is part of the mcl project.
|
||||||
|
// Copyright (c) 2022 merryhime
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <bit>
|
||||||
|
|
||||||
|
#include "mcl/assert.hpp"
|
||||||
|
#include "mcl/container/detail/meta_byte.hpp"
|
||||||
|
#include "mcl/macro/architecture.hpp"
|
||||||
|
#include "mcl/stdint.hpp"
|
||||||
|
|
||||||
|
#if defined(MCL_ARCHITECTURE_ARM64)
|
||||||
|
# include <arm_neon.h>
|
||||||
|
#elif defined(MCL_ARCHITECTURE_X86_64)
|
||||||
|
# include <emmintrin.h>
|
||||||
|
|
||||||
|
# include "mcl/bit_cast.hpp"
|
||||||
|
#else
|
||||||
|
# include <cstring>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace mcl::detail {
|
||||||
|
|
||||||
|
#if defined(MCL_ARCHITECTURE_ARM64)
|
||||||
|
|
||||||
|
struct meta_byte_group {
|
||||||
|
static constexpr size_t max_group_size{16};
|
||||||
|
|
||||||
|
explicit meta_byte_group(meta_byte* ptr)
|
||||||
|
: data{vld1q_u8(reinterpret_cast<u8*>(ptr))}
|
||||||
|
{}
|
||||||
|
|
||||||
|
explicit meta_byte_group(const std::array<meta_byte, 16>& array)
|
||||||
|
: data{vld1q_u8(reinterpret_cast<const u8*>(array.data()))}
|
||||||
|
{}
|
||||||
|
|
||||||
|
uint64x2_t match(meta_byte cmp) const
|
||||||
|
{
|
||||||
|
return vreinterpretq_u64_u8(vandq_u8(vceqq_u8(data,
|
||||||
|
vdupq_n_u8(static_cast<u8>(cmp))),
|
||||||
|
vdupq_n_u8(0x80)));
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64x2_t match_empty_or_tombstone() const
|
||||||
|
{
|
||||||
|
return vreinterpretq_u64_u8(vandq_u8(data,
|
||||||
|
vdupq_n_u8(0x80)));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_any_empty() const
|
||||||
|
{
|
||||||
|
static_assert(meta_byte::empty == static_cast<meta_byte>(0xff), "empty must be maximal u8 value");
|
||||||
|
return vmaxvq_u8(data) == 0xff;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_all_empty_or_tombstone() const
|
||||||
|
{
|
||||||
|
return vminvq_u8(vandq_u8(data, vdupq_n_u8(0x80))) == 0x80;
|
||||||
|
}
|
||||||
|
|
||||||
|
meta_byte get(size_t index) const
|
||||||
|
{
|
||||||
|
return static_cast<meta_byte>(data[index]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set(size_t index, meta_byte value)
|
||||||
|
{
|
||||||
|
data[index] = static_cast<u8>(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8x16_t data;
|
||||||
|
};
|
||||||
|
|
||||||
|
# define MCL_HMAP_MATCH_META_BYTE_GROUP(MATCH, ...) \
|
||||||
|
{ \
|
||||||
|
const uint64x2_t match_result{MATCH}; \
|
||||||
|
\
|
||||||
|
for (u64 match_result_v{match_result[0]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \
|
||||||
|
const size_t match_index{static_cast<size_t>(std::countr_zero(match_result_v) / 8)}; \
|
||||||
|
__VA_ARGS__ \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
for (u64 match_result_v{match_result[1]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \
|
||||||
|
const size_t match_index{static_cast<size_t>(8 + std::countr_zero(match_result_v) / 8)}; \
|
||||||
|
__VA_ARGS__ \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
# define MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(MATCH, ...) \
|
||||||
|
{ \
|
||||||
|
const uint64x2_t match_result{MATCH}; \
|
||||||
|
\
|
||||||
|
for (u64 match_result_v{match_result[0]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \
|
||||||
|
const size_t match_index{static_cast<size_t>(std::countr_zero(match_result_v) / 8)}; \
|
||||||
|
__VA_ARGS__ \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
for (u64 match_result_v{match_result[1] & 0x00ffffffffffffff}; match_result_v != 0; match_result_v &= match_result_v - 1) { \
|
||||||
|
const size_t match_index{static_cast<size_t>(8 + std::countr_zero(match_result_v) / 8)}; \
|
||||||
|
__VA_ARGS__ \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined(MCL_ARCHITECTURE_X86_64)
|
||||||
|
|
||||||
|
struct meta_byte_group {
|
||||||
|
static constexpr size_t max_group_size{16};
|
||||||
|
|
||||||
|
explicit meta_byte_group(meta_byte* ptr)
|
||||||
|
: data{_mm_load_si128(reinterpret_cast<__m128i const*>(ptr))}
|
||||||
|
{}
|
||||||
|
|
||||||
|
explicit meta_byte_group(const std::array<meta_byte, 16>& array)
|
||||||
|
: data{_mm_loadu_si128(reinterpret_cast<__m128i const*>(array.data()))}
|
||||||
|
{}
|
||||||
|
|
||||||
|
u16 match(meta_byte cmp) const
|
||||||
|
{
|
||||||
|
return _mm_movemask_epi8(_mm_cmpeq_epi8(data, _mm_set1_epi8(static_cast<u8>(cmp))));
|
||||||
|
}
|
||||||
|
|
||||||
|
u16 match_empty_or_tombstone() const
|
||||||
|
{
|
||||||
|
return _mm_movemask_epi8(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_any_empty() const
|
||||||
|
{
|
||||||
|
return match(meta_byte::empty);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_all_empty_or_tombstone() const
|
||||||
|
{
|
||||||
|
return match_empty_or_tombstone() == 0xffff;
|
||||||
|
}
|
||||||
|
|
||||||
|
meta_byte get(size_t index) const
|
||||||
|
{
|
||||||
|
return mcl::bit_cast<std::array<meta_byte, max_group_size>>(data)[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
void set(size_t index, meta_byte value)
|
||||||
|
{
|
||||||
|
auto array = mcl::bit_cast<std::array<meta_byte, max_group_size>>(data);
|
||||||
|
array[index] = value;
|
||||||
|
data = mcl::bit_cast<__m128i>(array);
|
||||||
|
}
|
||||||
|
|
||||||
|
__m128i data;
|
||||||
|
};
|
||||||
|
|
||||||
|
# define MCL_HMAP_MATCH_META_BYTE_GROUP(MATCH, ...) \
|
||||||
|
{ \
|
||||||
|
for (u16 match_result{MATCH}; match_result != 0; match_result &= match_result - 1) { \
|
||||||
|
const size_t match_index{static_cast<size_t>(std::countr_zero(match_result))}; \
|
||||||
|
__VA_ARGS__ \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
# define MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(MATCH, ...) \
|
||||||
|
{ \
|
||||||
|
for (u16 match_result{static_cast<u16>((MATCH) & (0x7fff))}; match_result != 0; match_result &= match_result - 1) { \
|
||||||
|
const size_t match_index{static_cast<size_t>(std::countr_zero(match_result))}; \
|
||||||
|
__VA_ARGS__ \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
struct meta_byte_group {
|
||||||
|
static constexpr size_t max_group_size{16};
|
||||||
|
|
||||||
|
static constexpr u64 msb{0x8080808080808080};
|
||||||
|
static constexpr u64 lsb{0x0101010101010101};
|
||||||
|
static constexpr u64 not_msb{0x7f7f7f7f7f7f7f7f};
|
||||||
|
static constexpr u64 not_lsb{0xfefefefefefefefe};
|
||||||
|
|
||||||
|
explicit meta_byte_group(meta_byte* ptr)
|
||||||
|
{
|
||||||
|
std::memcpy(data.data(), ptr, sizeof(data));
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit meta_byte_group(const std::array<meta_byte, 16>& array)
|
||||||
|
: data{array}
|
||||||
|
{}
|
||||||
|
|
||||||
|
std::array<u64, 2> match(meta_byte cmp) const
|
||||||
|
{
|
||||||
|
DEBUG_ASSERT(is_full(cmp));
|
||||||
|
|
||||||
|
const u64 vcmp{lsb * static_cast<u64>(cmp)};
|
||||||
|
return {(msb - ((data[0] ^ vcmp) & not_msb)) & ~data[0] & msb, (msb - ((data[1] ^ vcmp) & not_msb)) & ~data[1] & msb};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::array<u64, 2> match_empty_or_tombstone() const
|
||||||
|
{
|
||||||
|
return {data[0] & msb, data[1] & msb};
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_any_empty() const
|
||||||
|
{
|
||||||
|
static_assert((static_cast<u8>(meta_byte::empty) & 0xc0) == 0xc0);
|
||||||
|
static_assert((static_cast<u8>(meta_byte::tombstone) & 0xc0) == 0x80);
|
||||||
|
|
||||||
|
return (data[0] & (data[0] << 1) & msb) || (data[1] & (data[1] << 1) & msb);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_all_empty_or_tombstone() const
|
||||||
|
{
|
||||||
|
return (data[0] & data[1] & msb) == msb;
|
||||||
|
}
|
||||||
|
|
||||||
|
meta_byte get(size_t index) const
|
||||||
|
{
|
||||||
|
return mcl::bit_cast<std::array<meta_byte, max_group_size>>(data)[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
void set(size_t index, meta_byte value)
|
||||||
|
{
|
||||||
|
auto array = mcl::bit_cast<std::array<meta_byte, max_group_size>>(data);
|
||||||
|
array[index] = value;
|
||||||
|
data = mcl::bit_cast<std::array<u64, 2>>(array);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::array<u64, 2> data;
|
||||||
|
};
|
||||||
|
|
||||||
|
# define MCL_HMAP_MATCH_META_BYTE_GROUP(MATCH, ...) \
|
||||||
|
{ \
|
||||||
|
const std::array<u64, 2> match_result{MATCH}; \
|
||||||
|
\
|
||||||
|
for (u64 match_result_v{match_result[0]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \
|
||||||
|
const size_t match_index{static_cast<size_t>(std::countr_zero(match_result_v) / 8)}; \
|
||||||
|
__VA_ARGS__ \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
for (u64 match_result_v{match_result[1]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \
|
||||||
|
const size_t match_index{static_cast<size_t>(8 + std::countr_zero(match_result_v) / 8)}; \
|
||||||
|
__VA_ARGS__ \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
# define MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(MATCH, ...) \
|
||||||
|
{ \
|
||||||
|
const std::array<u64, 2> match_result{MATCH}; \
|
||||||
|
\
|
||||||
|
for (u64 match_result_v{match_result[0]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \
|
||||||
|
const size_t match_index{static_cast<size_t>(std::countr_zero(match_result_v) / 8)}; \
|
||||||
|
__VA_ARGS__ \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
for (u64 match_result_v{match_result[1] & 0x00ffffffffffffff}; match_result_v != 0; match_result_v &= match_result_v - 1) { \
|
||||||
|
const size_t match_index{static_cast<size_t>(8 + std::countr_zero(match_result_v) / 8)}; \
|
||||||
|
__VA_ARGS__ \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} // namespace mcl::detail
|
16
externals/dynarmic/externals/mcl/include/mcl/container/detail/slot_union.hpp
vendored
Executable file
16
externals/dynarmic/externals/mcl/include/mcl/container/detail/slot_union.hpp
vendored
Executable file
|
@ -0,0 +1,16 @@
|
||||||
|
// This file is part of the mcl project.
|
||||||
|
// Copyright (c) 2022 merryhime
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
namespace mcl::detail {
|
||||||
|
|
||||||
|
template<typename ValueType>
|
||||||
|
union slot_union {
|
||||||
|
slot_union() {}
|
||||||
|
~slot_union() {}
|
||||||
|
ValueType value;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace mcl::detail
|
532
externals/dynarmic/externals/mcl/include/mcl/container/hmap.hpp
vendored
Executable file
532
externals/dynarmic/externals/mcl/include/mcl/container/hmap.hpp
vendored
Executable file
|
@ -0,0 +1,532 @@
|
||||||
|
// This file is part of the mcl project.
|
||||||
|
// Copyright (c) 2022 merryhime
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <functional>
|
||||||
|
#include <limits>
|
||||||
|
#include <type_traits>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include "mcl/assert.hpp"
|
||||||
|
#include "mcl/container/detail/meta_byte.hpp"
|
||||||
|
#include "mcl/container/detail/meta_byte_group.hpp"
|
||||||
|
#include "mcl/container/detail/slot_union.hpp"
|
||||||
|
#include "mcl/hash/xmrx.hpp"
|
||||||
|
#include "mcl/hint/assume.hpp"
|
||||||
|
#include "mcl/memory/overaligned_unique_ptr.hpp"
|
||||||
|
|
||||||
|
namespace mcl {
|
||||||
|
|
||||||
|
template<typename KeyType, typename MappedType, typename Hash, typename Pred>
|
||||||
|
class hmap;
|
||||||
|
|
||||||
|
template<bool IsConst, typename KeyType, typename MappedType, typename Hash, typename Pred>
|
||||||
|
class hmap_iterator {
|
||||||
|
using base_value_type = std::pair<const KeyType, MappedType>;
|
||||||
|
using slot_type = detail::slot_union<base_value_type>;
|
||||||
|
|
||||||
|
public:
|
||||||
|
using key_type = KeyType;
|
||||||
|
using mapped_type = MappedType;
|
||||||
|
using iterator_category = std::forward_iterator_tag;
|
||||||
|
using difference_type = std::ptrdiff_t;
|
||||||
|
using value_type = std::conditional_t<IsConst, std::add_const_t<base_value_type>, base_value_type>;
|
||||||
|
using pointer = value_type*;
|
||||||
|
using const_pointer = const value_type*;
|
||||||
|
using reference = value_type&;
|
||||||
|
using const_reference = const value_type&;
|
||||||
|
|
||||||
|
hmap_iterator() = default;
|
||||||
|
hmap_iterator(const hmap_iterator& other) = default;
|
||||||
|
hmap_iterator& operator=(const hmap_iterator& other) = default;
|
||||||
|
|
||||||
|
hmap_iterator& operator++()
|
||||||
|
{
|
||||||
|
if (mb_ptr == nullptr)
|
||||||
|
return *this;
|
||||||
|
|
||||||
|
++mb_ptr;
|
||||||
|
++slot_ptr;
|
||||||
|
|
||||||
|
skip_empty_or_tombstone();
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
hmap_iterator operator++(int)
|
||||||
|
{
|
||||||
|
hmap_iterator it(*this);
|
||||||
|
++*this;
|
||||||
|
return it;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==(const hmap_iterator& other) const
|
||||||
|
{
|
||||||
|
return std::tie(mb_ptr, slot_ptr) == std::tie(other.mb_ptr, other.slot_ptr);
|
||||||
|
}
|
||||||
|
bool operator!=(const hmap_iterator& other) const
|
||||||
|
{
|
||||||
|
return !operator==(other);
|
||||||
|
}
|
||||||
|
|
||||||
|
reference operator*() const
|
||||||
|
{
|
||||||
|
return static_cast<reference>(slot_ptr->value);
|
||||||
|
}
|
||||||
|
pointer operator->() const
|
||||||
|
{
|
||||||
|
return std::addressof(operator*());
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
friend class hmap<KeyType, MappedType, Hash, Pred>;
|
||||||
|
|
||||||
|
hmap_iterator(detail::meta_byte* mb_ptr, slot_type* slot_ptr)
|
||||||
|
: mb_ptr{mb_ptr}, slot_ptr{slot_ptr}
|
||||||
|
{
|
||||||
|
ASSUME(mb_ptr != nullptr);
|
||||||
|
ASSUME(slot_ptr != nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void skip_empty_or_tombstone()
|
||||||
|
{
|
||||||
|
if (!mb_ptr)
|
||||||
|
return;
|
||||||
|
|
||||||
|
while (*mb_ptr == detail::meta_byte::empty || *mb_ptr == detail::meta_byte::tombstone) {
|
||||||
|
++mb_ptr;
|
||||||
|
++slot_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*mb_ptr == detail::meta_byte::end_sentinel) {
|
||||||
|
mb_ptr = nullptr;
|
||||||
|
slot_ptr = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
detail::meta_byte* mb_ptr{nullptr};
|
||||||
|
slot_type* slot_ptr{nullptr};
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename KeyType, typename MappedType, typename Hash = hash::avalanche_xmrx<KeyType>, typename Pred = std::equal_to<KeyType>>
|
||||||
|
class hmap {
|
||||||
|
public:
|
||||||
|
using key_type = KeyType;
|
||||||
|
using mapped_type = MappedType;
|
||||||
|
using hasher = Hash;
|
||||||
|
using key_equal = Pred;
|
||||||
|
using value_type = std::pair<const key_type, mapped_type>;
|
||||||
|
using reference = value_type&;
|
||||||
|
using const_reference = const value_type&;
|
||||||
|
using pointer = value_type*;
|
||||||
|
using const_pointer = const value_type*;
|
||||||
|
using size_type = std::size_t;
|
||||||
|
using difference_type = std::ptrdiff_t;
|
||||||
|
|
||||||
|
using iterator = hmap_iterator<false, key_type, mapped_type, hasher, key_equal>;
|
||||||
|
using const_iterator = hmap_iterator<true, key_type, mapped_type, hasher, key_equal>;
|
||||||
|
|
||||||
|
private:
|
||||||
|
static constexpr size_t group_size{detail::meta_byte_group::max_group_size};
|
||||||
|
static constexpr size_t average_max_group_load{group_size - 2};
|
||||||
|
|
||||||
|
using slot_type = detail::slot_union<value_type>;
|
||||||
|
using slot_ptr = std::unique_ptr<slot_type[]>;
|
||||||
|
using meta_byte_ptr = overaligned_unique_ptr<group_size, detail::meta_byte[]>;
|
||||||
|
static_assert(!std::is_reference_v<key_type>);
|
||||||
|
static_assert(!std::is_reference_v<mapped_type>);
|
||||||
|
|
||||||
|
public:
|
||||||
|
hmap()
|
||||||
|
{
|
||||||
|
initialize_members(1);
|
||||||
|
}
|
||||||
|
hmap(const hmap& other)
|
||||||
|
{
|
||||||
|
deep_copy(other);
|
||||||
|
}
|
||||||
|
hmap(hmap&& other)
|
||||||
|
: group_index_mask{std::exchange(other.group_index_mask, 0)}
|
||||||
|
, empty_slots{std::exchange(other.empty_slots, 0)}
|
||||||
|
, full_slots{std::exchange(other.full_slots, 0)}
|
||||||
|
, mbs{std::move(other.mbs)}
|
||||||
|
, slots{std::move(other.slots)}
|
||||||
|
{
|
||||||
|
}
|
||||||
|
hmap& operator=(const hmap& other)
|
||||||
|
{
|
||||||
|
deep_copy(other);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
hmap& operator=(hmap&& other)
|
||||||
|
{
|
||||||
|
group_index_mask = std::exchange(other.group_index_mask, 0);
|
||||||
|
empty_slots = std::exchange(other.empty_slots, 0);
|
||||||
|
full_slots = std::exchange(other.full_slots, 0);
|
||||||
|
mbs = std::move(other.mbs);
|
||||||
|
slots = std::move(other.slots);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
~hmap()
|
||||||
|
{
|
||||||
|
if (!mbs)
|
||||||
|
return;
|
||||||
|
|
||||||
|
clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] bool empty() const noexcept { return full_slots == 0; }
|
||||||
|
size_type size() const noexcept { return full_slots; }
|
||||||
|
size_type max_size() const noexcept { return static_cast<size_type>(std::numeric_limits<difference_type>::max()); }
|
||||||
|
|
||||||
|
iterator begin()
|
||||||
|
{
|
||||||
|
iterator result{iterator_at(0)};
|
||||||
|
result.skip_empty_or_tombstone();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
iterator end()
|
||||||
|
{
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
const_iterator cbegin() const
|
||||||
|
{
|
||||||
|
const_iterator result{const_iterator_at(0)};
|
||||||
|
result.skip_empty_or_tombstone();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
const_iterator cend() const
|
||||||
|
{
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
const_iterator begin() const
|
||||||
|
{
|
||||||
|
return cbegin();
|
||||||
|
}
|
||||||
|
const_iterator end() const
|
||||||
|
{
|
||||||
|
return cend();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename K = key_type, typename... Args>
|
||||||
|
std::pair<iterator, bool> try_emplace(K&& k, Args&&... args)
|
||||||
|
{
|
||||||
|
auto [item_index, item_found] = find_key_or_empty_slot(k);
|
||||||
|
if (!item_found) {
|
||||||
|
new (&slots[item_index].value) value_type(
|
||||||
|
std::piecewise_construct,
|
||||||
|
std::forward_as_tuple(std::forward<K>(k)),
|
||||||
|
std::forward_as_tuple(std::forward<Args>(args)...));
|
||||||
|
}
|
||||||
|
return {iterator_at(item_index), !item_found};
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename K = key_type, typename V = mapped_type>
|
||||||
|
std::pair<iterator, bool> insert_or_assign(K&& k, V&& v)
|
||||||
|
{
|
||||||
|
auto [item_index, item_found] = find_key_or_empty_slot(k);
|
||||||
|
if (item_found) {
|
||||||
|
slots[item_index].value.second = std::forward<V>(v);
|
||||||
|
} else {
|
||||||
|
new (&slots[item_index].value) value_type(
|
||||||
|
std::forward<K>(k),
|
||||||
|
std::forward<V>(v));
|
||||||
|
}
|
||||||
|
return {iterator_at(item_index), !item_found};
|
||||||
|
}
|
||||||
|
|
||||||
|
void erase(const_iterator position)
|
||||||
|
{
|
||||||
|
if (position == cend()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::size_t item_index{static_cast<std::size_t>(std::distance(mbs.get(), position.mb_ptr))};
|
||||||
|
const std::size_t group_index{item_index / group_size};
|
||||||
|
const detail::meta_byte_group g{mbs.get() + group_index * group_size};
|
||||||
|
|
||||||
|
erase_impl(item_index, std::move(g));
|
||||||
|
}
|
||||||
|
void erase(iterator position)
|
||||||
|
{
|
||||||
|
if (position == end()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::size_t item_index{static_cast<std::size_t>(std::distance(mbs.get(), position.mb_ptr))};
|
||||||
|
const std::size_t group_index{item_index / group_size};
|
||||||
|
const detail::meta_byte_group g{mbs.get() + group_index * group_size};
|
||||||
|
|
||||||
|
erase_impl(item_index, std::move(g));
|
||||||
|
}
|
||||||
|
template<typename K = key_type>
|
||||||
|
size_t erase(const K& key)
|
||||||
|
{
|
||||||
|
const std::size_t hash{hasher{}(key)};
|
||||||
|
const detail::meta_byte mb{detail::meta_byte_from_hash(hash)};
|
||||||
|
|
||||||
|
size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
detail::meta_byte_group g{mbs.get() + group_index * group_size};
|
||||||
|
|
||||||
|
MCL_HMAP_MATCH_META_BYTE_GROUP(g.match(mb), {
|
||||||
|
const std::size_t item_index{group_index * group_size + match_index};
|
||||||
|
|
||||||
|
if (key_equal{}(slots[item_index].value.first, key)) [[likely]] {
|
||||||
|
erase_impl(item_index, std::move(g));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (g.is_any_empty()) [[likely]] {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
group_index = (group_index + 1) & group_index_mask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename K = key_type>
|
||||||
|
iterator find(const K& key)
|
||||||
|
{
|
||||||
|
const std::size_t hash{hasher{}(key)};
|
||||||
|
const detail::meta_byte mb{detail::meta_byte_from_hash(hash)};
|
||||||
|
|
||||||
|
size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
detail::meta_byte_group g{mbs.get() + group_index * group_size};
|
||||||
|
|
||||||
|
MCL_HMAP_MATCH_META_BYTE_GROUP(g.match(mb), {
|
||||||
|
const std::size_t item_index{group_index * group_size + match_index};
|
||||||
|
|
||||||
|
if (key_equal{}(slots[item_index].value.first, key)) [[likely]] {
|
||||||
|
return iterator_at(item_index);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (g.is_any_empty()) [[likely]] {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
group_index = (group_index + 1) & group_index_mask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<typename K = key_type>
|
||||||
|
const_iterator find(const K& key) const
|
||||||
|
{
|
||||||
|
const std::size_t hash{hasher{}(key)};
|
||||||
|
const detail::meta_byte mb{detail::meta_byte_from_hash(hash)};
|
||||||
|
|
||||||
|
size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
detail::meta_byte_group g{mbs.get() + group_index * group_size};
|
||||||
|
|
||||||
|
MCL_HMAP_MATCH_META_BYTE_GROUP(g.match(mb), {
|
||||||
|
const std::size_t item_index{group_index * group_size + match_index};
|
||||||
|
|
||||||
|
if (key_equal{}(slots[item_index].value.first, key)) [[likely]] {
|
||||||
|
return const_iterator_at(item_index);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (g.is_any_empty()) [[likely]] {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
group_index = (group_index + 1) & group_index_mask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<typename K = key_type>
|
||||||
|
bool contains(const K& key) const
|
||||||
|
{
|
||||||
|
return find(key) != end();
|
||||||
|
}
|
||||||
|
template<typename K = key_type>
|
||||||
|
size_t count(const K& key) const
|
||||||
|
{
|
||||||
|
return contains(key) ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename K = key_type>
|
||||||
|
mapped_type& operator[](K&& k)
|
||||||
|
{
|
||||||
|
return try_emplace(std::forward<K>(k)).first->second;
|
||||||
|
}
|
||||||
|
template<typename K = key_type>
|
||||||
|
mapped_type& at(K&& k)
|
||||||
|
{
|
||||||
|
const auto iter{find(k)};
|
||||||
|
if (iter == end()) {
|
||||||
|
throw std::out_of_range("hmap::at: key not found");
|
||||||
|
}
|
||||||
|
return iter->second;
|
||||||
|
}
|
||||||
|
template<typename K = key_type>
|
||||||
|
const mapped_type& at(K&& k) const
|
||||||
|
{
|
||||||
|
const auto iter{find(k)};
|
||||||
|
if (iter == end()) {
|
||||||
|
throw std::out_of_range("hmap::at: key not found");
|
||||||
|
}
|
||||||
|
return iter->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear()
|
||||||
|
{
|
||||||
|
for (auto iter{begin()}; iter != end(); ++iter) {
|
||||||
|
iter->~value_type();
|
||||||
|
}
|
||||||
|
|
||||||
|
clear_metadata();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
iterator iterator_at(std::size_t item_index)
|
||||||
|
{
|
||||||
|
return {mbs.get() + item_index, slots.get() + item_index};
|
||||||
|
}
|
||||||
|
const_iterator const_iterator_at(std::size_t item_index) const
|
||||||
|
{
|
||||||
|
return {mbs.get() + item_index, slots.get() + item_index};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<std::size_t, bool> find_key_or_empty_slot(const key_type& key)
|
||||||
|
{
|
||||||
|
const std::size_t hash{hasher{}(key)};
|
||||||
|
const detail::meta_byte mb{detail::meta_byte_from_hash(hash)};
|
||||||
|
|
||||||
|
std::size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
detail::meta_byte_group g{mbs.get() + group_index * group_size};
|
||||||
|
|
||||||
|
MCL_HMAP_MATCH_META_BYTE_GROUP(g.match(mb), {
|
||||||
|
const std::size_t item_index{group_index * group_size + match_index};
|
||||||
|
|
||||||
|
if (key_equal{}(slots[item_index].value.first, key)) [[likely]] {
|
||||||
|
return {item_index, true};
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (g.is_any_empty()) [[likely]] {
|
||||||
|
return {find_empty_slot_to_insert(hash), false};
|
||||||
|
}
|
||||||
|
|
||||||
|
group_index = (group_index + 1) & group_index_mask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t find_empty_slot_to_insert(const std::size_t hash)
|
||||||
|
{
|
||||||
|
if (empty_slots == 0) [[unlikely]] {
|
||||||
|
grow_and_rehash();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
detail::meta_byte_group g{mbs.get() + group_index * group_size};
|
||||||
|
|
||||||
|
MCL_HMAP_MATCH_META_BYTE_GROUP(g.match_empty_or_tombstone(), {
|
||||||
|
const std::size_t item_index{group_index * group_size + match_index};
|
||||||
|
|
||||||
|
if (mbs[item_index] == detail::meta_byte::empty) [[likely]] {
|
||||||
|
--empty_slots;
|
||||||
|
}
|
||||||
|
++full_slots;
|
||||||
|
|
||||||
|
mbs[item_index] = detail::meta_byte_from_hash(hash);
|
||||||
|
|
||||||
|
return item_index;
|
||||||
|
});
|
||||||
|
|
||||||
|
group_index = (group_index + 1) & group_index_mask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void erase_impl(std::size_t item_index, detail::meta_byte_group&& g)
|
||||||
|
{
|
||||||
|
slots[item_index].value->~value_type();
|
||||||
|
|
||||||
|
--full_slots;
|
||||||
|
if (g.is_any_empty()) {
|
||||||
|
mbs[item_index] = detail::meta_byte::empty;
|
||||||
|
++empty_slots;
|
||||||
|
} else {
|
||||||
|
mbs[item_index] = detail::meta_byte::tombstone;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void grow_and_rehash()
|
||||||
|
{
|
||||||
|
const std::size_t new_group_count{2 * (group_index_mask + 1)};
|
||||||
|
|
||||||
|
pow2_resize(new_group_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
void pow2_resize(std::size_t new_group_count)
|
||||||
|
{
|
||||||
|
auto iter{begin()};
|
||||||
|
|
||||||
|
const auto old_mbs{std::move(mbs)};
|
||||||
|
const auto old_slots{std::move(slots)};
|
||||||
|
|
||||||
|
initialize_members(new_group_count);
|
||||||
|
|
||||||
|
for (; iter != end(); ++iter) {
|
||||||
|
const std::size_t hash{hasher{}(iter->first)};
|
||||||
|
const std::size_t item_index{find_empty_slot_to_insert(hash)};
|
||||||
|
|
||||||
|
new (&slots[item_index].value) value_type(std::move(iter.slot_ptr->value));
|
||||||
|
iter.slot_ptr->value.~value_type();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void deep_copy(const hmap& other)
|
||||||
|
{
|
||||||
|
initialize_members(other.group_index_mask + 1);
|
||||||
|
|
||||||
|
for (auto iter = other.begin(); iter != other.end(); ++iter) {
|
||||||
|
const std::size_t hash{hasher{}(iter->first)};
|
||||||
|
const std::size_t item_index{find_empty_slot_to_insert(hash)};
|
||||||
|
|
||||||
|
new (&slots[item_index].value) value_type(iter.slot_ptr->value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void initialize_members(std::size_t group_count)
|
||||||
|
{
|
||||||
|
// DEBUG_ASSERT(group_count != 0 && std::ispow2(group_count));
|
||||||
|
|
||||||
|
group_index_mask = group_count - 1;
|
||||||
|
mbs = make_overaligned_unique_ptr_array<group_size, detail::meta_byte>(group_count * group_size + 1);
|
||||||
|
slots = slot_ptr{new slot_type[group_count * group_size]};
|
||||||
|
|
||||||
|
clear_metadata();
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear_metadata()
|
||||||
|
{
|
||||||
|
const std::size_t group_count{group_index_mask + 1};
|
||||||
|
|
||||||
|
empty_slots = group_count * average_max_group_load;
|
||||||
|
full_slots = 0;
|
||||||
|
|
||||||
|
std::memset(mbs.get(), static_cast<int>(detail::meta_byte::empty), group_count * group_size);
|
||||||
|
mbs[group_count * group_size] = detail::meta_byte::end_sentinel;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t group_index_mask;
|
||||||
|
std::size_t empty_slots;
|
||||||
|
std::size_t full_slots;
|
||||||
|
meta_byte_ptr mbs;
|
||||||
|
slot_ptr slots;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace mcl
|
549
externals/dynarmic/externals/mcl/include/mcl/container/ihmap.hpp
vendored
Executable file
549
externals/dynarmic/externals/mcl/include/mcl/container/ihmap.hpp
vendored
Executable file
|
@ -0,0 +1,549 @@
|
||||||
|
// This file is part of the mcl project.
|
||||||
|
// Copyright (c) 2022 merryhime
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <functional>
|
||||||
|
#include <limits>
|
||||||
|
#include <type_traits>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include "mcl/assert.hpp"
|
||||||
|
#include "mcl/container/detail/meta_byte.hpp"
|
||||||
|
#include "mcl/container/detail/meta_byte_group.hpp"
|
||||||
|
#include "mcl/container/detail/slot_union.hpp"
|
||||||
|
#include "mcl/hash/xmrx.hpp"
|
||||||
|
#include "mcl/hint/assume.hpp"
|
||||||
|
|
||||||
|
namespace mcl {
|
||||||
|
|
||||||
|
template<typename KeyType, typename MappedType, typename Hash, typename Pred>
|
||||||
|
class ihmap;
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
|
||||||
|
constexpr std::array<meta_byte, 16> ihmap_default_meta{
|
||||||
|
meta_byte::empty, meta_byte::empty, meta_byte::empty, meta_byte::empty,
|
||||||
|
meta_byte::empty, meta_byte::empty, meta_byte::empty, meta_byte::empty,
|
||||||
|
meta_byte::empty, meta_byte::empty, meta_byte::empty, meta_byte::empty,
|
||||||
|
meta_byte::empty, meta_byte::empty, meta_byte::empty, meta_byte::tombstone};
|
||||||
|
|
||||||
|
template<typename KeyType, typename MappedType>
|
||||||
|
struct ihmap_group {
|
||||||
|
using base_value_type = std::pair<const KeyType, MappedType>;
|
||||||
|
using slot_type = detail::slot_union<base_value_type>;
|
||||||
|
|
||||||
|
static constexpr std::size_t group_size{meta_byte_group::max_group_size - 1};
|
||||||
|
|
||||||
|
meta_byte_group meta{ihmap_default_meta};
|
||||||
|
std::array<slot_type, group_size> slots{};
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
|
template<bool IsConst, typename KeyType, typename MappedType, typename Hash, typename Pred>
|
||||||
|
class ihmap_iterator {
|
||||||
|
using group_type = detail::ihmap_group<KeyType, MappedType>;
|
||||||
|
using base_value_type = typename group_type::base_value_type;
|
||||||
|
|
||||||
|
public:
|
||||||
|
using key_type = KeyType;
|
||||||
|
using mapped_type = MappedType;
|
||||||
|
using iterator_category = std::forward_iterator_tag;
|
||||||
|
using difference_type = std::ptrdiff_t;
|
||||||
|
using value_type = std::conditional_t<IsConst, std::add_const_t<base_value_type>, base_value_type>;
|
||||||
|
using pointer = value_type*;
|
||||||
|
using const_pointer = const value_type*;
|
||||||
|
using reference = value_type&;
|
||||||
|
using const_reference = const value_type&;
|
||||||
|
|
||||||
|
ihmap_iterator() = default;
|
||||||
|
ihmap_iterator(const ihmap_iterator& other) = default;
|
||||||
|
ihmap_iterator& operator=(const ihmap_iterator& other) = default;
|
||||||
|
|
||||||
|
ihmap_iterator& operator++()
|
||||||
|
{
|
||||||
|
if (group_ptr == nullptr)
|
||||||
|
return *this;
|
||||||
|
|
||||||
|
++slot_index;
|
||||||
|
|
||||||
|
skip_empty_or_tombstone();
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
ihmap_iterator operator++(int)
|
||||||
|
{
|
||||||
|
ihmap_iterator it(*this);
|
||||||
|
++*this;
|
||||||
|
return it;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==(const ihmap_iterator& other) const
|
||||||
|
{
|
||||||
|
return std::tie(group_ptr, slot_index) == std::tie(other.group_ptr, other.slot_index);
|
||||||
|
}
|
||||||
|
bool operator!=(const ihmap_iterator& other) const
|
||||||
|
{
|
||||||
|
return !operator==(other);
|
||||||
|
}
|
||||||
|
|
||||||
|
reference operator*() const
|
||||||
|
{
|
||||||
|
return static_cast<reference>(group_ptr->slots[slot_index].value);
|
||||||
|
}
|
||||||
|
pointer operator->() const
|
||||||
|
{
|
||||||
|
return std::addressof(operator*());
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
friend class ihmap<KeyType, MappedType, Hash, Pred>;
|
||||||
|
|
||||||
|
ihmap_iterator(group_type* group_ptr, size_t slot_index)
|
||||||
|
: group_ptr{group_ptr}, slot_index{slot_index}
|
||||||
|
{
|
||||||
|
ASSUME(group_ptr != nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void skip_empty_or_tombstone()
|
||||||
|
{
|
||||||
|
if (!group_ptr)
|
||||||
|
return;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const detail::meta_byte mb = group_ptr->meta.get(slot_index);
|
||||||
|
if (slot_index == group_type::group_size) {
|
||||||
|
slot_index = 0;
|
||||||
|
++group_ptr;
|
||||||
|
|
||||||
|
if (mb == detail::meta_byte::end_sentinel) {
|
||||||
|
group_ptr = nullptr;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (is_full(mb)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
++slot_index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
group_type* group_ptr{nullptr};
|
||||||
|
std::size_t slot_index{0};
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename KeyType, typename MappedType, typename Hash = hash::avalanche_xmrx<KeyType>, typename Pred = std::equal_to<KeyType>>
|
||||||
|
class ihmap {
|
||||||
|
using group_type = detail::ihmap_group<KeyType, MappedType>;
|
||||||
|
|
||||||
|
public:
|
||||||
|
using key_type = KeyType;
|
||||||
|
using mapped_type = MappedType;
|
||||||
|
using hasher = Hash;
|
||||||
|
using key_equal = Pred;
|
||||||
|
using value_type = typename group_type::base_value_type;
|
||||||
|
using reference = value_type&;
|
||||||
|
using const_reference = const value_type&;
|
||||||
|
using pointer = value_type*;
|
||||||
|
using const_pointer = const value_type*;
|
||||||
|
using size_type = std::size_t;
|
||||||
|
using difference_type = std::ptrdiff_t;
|
||||||
|
|
||||||
|
using iterator = ihmap_iterator<false, key_type, mapped_type, hasher, key_equal>;
|
||||||
|
using const_iterator = ihmap_iterator<true, key_type, mapped_type, hasher, key_equal>;
|
||||||
|
|
||||||
|
private:
|
||||||
|
static_assert(!std::is_reference_v<key_type>);
|
||||||
|
static_assert(!std::is_reference_v<mapped_type>);
|
||||||
|
|
||||||
|
static constexpr std::size_t group_size{group_type::group_size};
|
||||||
|
static constexpr std::size_t average_max_group_load{group_size - 2};
|
||||||
|
|
||||||
|
struct position {
|
||||||
|
std::size_t group_index;
|
||||||
|
std::size_t slot_index;
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
ihmap()
|
||||||
|
{
|
||||||
|
initialize_members(1);
|
||||||
|
}
|
||||||
|
ihmap(const ihmap& other)
|
||||||
|
{
|
||||||
|
deep_copy(other);
|
||||||
|
}
|
||||||
|
ihmap(ihmap&& other)
|
||||||
|
: group_index_mask{std::exchange(other.group_index_mask, 0)}
|
||||||
|
, empty_slots{std::exchange(other.empty_slots, 0)}
|
||||||
|
, full_slots{std::exchange(other.full_slots, 0)}
|
||||||
|
, groups{std::move(other.groups)}
|
||||||
|
{
|
||||||
|
}
|
||||||
|
ihmap& operator=(const ihmap& other)
|
||||||
|
{
|
||||||
|
deep_copy(other);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
ihmap& operator=(ihmap&& other)
|
||||||
|
{
|
||||||
|
group_index_mask = std::exchange(other.group_index_mask, 0);
|
||||||
|
empty_slots = std::exchange(other.empty_slots, 0);
|
||||||
|
full_slots = std::exchange(other.full_slots, 0);
|
||||||
|
groups = std::move(other.groups);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
~ihmap()
|
||||||
|
{
|
||||||
|
if (!groups)
|
||||||
|
return;
|
||||||
|
|
||||||
|
clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] bool empty() const noexcept { return full_slots == 0; }
|
||||||
|
size_type size() const noexcept { return full_slots; }
|
||||||
|
size_type max_size() const noexcept { return static_cast<size_type>(std::numeric_limits<difference_type>::max()); }
|
||||||
|
|
||||||
|
iterator begin()
|
||||||
|
{
|
||||||
|
iterator result{iterator_at({0, 0})};
|
||||||
|
result.skip_empty_or_tombstone();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
iterator end()
|
||||||
|
{
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
const_iterator cbegin() const
|
||||||
|
{
|
||||||
|
const_iterator result{const_iterator_at({0, 0})};
|
||||||
|
result.skip_empty_or_tombstone();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
const_iterator cend() const
|
||||||
|
{
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
const_iterator begin() const
|
||||||
|
{
|
||||||
|
return cbegin();
|
||||||
|
}
|
||||||
|
const_iterator end() const
|
||||||
|
{
|
||||||
|
return cend();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename K = key_type, typename... Args>
|
||||||
|
std::pair<iterator, bool> try_emplace(K&& k, Args&&... args)
|
||||||
|
{
|
||||||
|
auto [pos, item_found] = find_key_or_empty_slot(k);
|
||||||
|
if (!item_found) {
|
||||||
|
new (&groups[pos.group_index].slots[pos.slot_index].value) value_type(
|
||||||
|
std::piecewise_construct,
|
||||||
|
std::forward_as_tuple(std::forward<K>(k)),
|
||||||
|
std::forward_as_tuple(std::forward<Args>(args)...));
|
||||||
|
}
|
||||||
|
return {iterator_at(pos), !item_found};
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename K = key_type, typename V = mapped_type>
|
||||||
|
std::pair<iterator, bool> insert_or_assign(K&& k, V&& v)
|
||||||
|
{
|
||||||
|
auto [pos, item_found] = find_key_or_empty_slot(k);
|
||||||
|
if (item_found) {
|
||||||
|
groups[pos.group_index].slots[pos.slot_index].value.second = std::forward<V>(v);
|
||||||
|
} else {
|
||||||
|
new (&groups[pos.group_index].slots[pos.slot_index].value) value_type(
|
||||||
|
std::forward<K>(k),
|
||||||
|
std::forward<V>(v));
|
||||||
|
}
|
||||||
|
return {iterator_at(pos), !item_found};
|
||||||
|
}
|
||||||
|
|
||||||
|
void erase(const_iterator iter)
|
||||||
|
{
|
||||||
|
if (iter == cend()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::size_t group_index{static_cast<std::size_t>(std::distance(groups.get(), iter.group_ptr))};
|
||||||
|
|
||||||
|
erase_impl({group_index, iter.slot_index});
|
||||||
|
}
|
||||||
|
void erase(iterator iter)
|
||||||
|
{
|
||||||
|
if (iter == end()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::size_t group_index{static_cast<std::size_t>(std::distance(groups.get(), iter.group_ptr))};
|
||||||
|
|
||||||
|
erase_impl({group_index, iter.slot_index});
|
||||||
|
}
|
||||||
|
template<typename K = key_type>
|
||||||
|
std::size_t erase(const K& key)
|
||||||
|
{
|
||||||
|
const std::size_t hash{hasher{}(key)};
|
||||||
|
const detail::meta_byte mb{detail::meta_byte_from_hash(hash)};
|
||||||
|
|
||||||
|
std::size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const group_type& g{groups[group_index]};
|
||||||
|
|
||||||
|
MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(g.meta.match(mb), {
|
||||||
|
if (key_equal{}(g.slots[match_index].value.first, key)) [[likely]] {
|
||||||
|
erase_impl({group_index, match_index});
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (g.meta.is_any_empty()) [[likely]] {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
group_index = (group_index + 1) & group_index_mask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename K = key_type>
|
||||||
|
iterator find(const K& key)
|
||||||
|
{
|
||||||
|
const std::size_t hash{hasher{}(key)};
|
||||||
|
const detail::meta_byte mb{detail::meta_byte_from_hash(hash)};
|
||||||
|
|
||||||
|
std::size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const group_type& g{groups[group_index]};
|
||||||
|
|
||||||
|
MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(g.meta.match(mb), {
|
||||||
|
if (key_equal{}(g.slots[match_index].value.first, key)) [[likely]] {
|
||||||
|
return iterator_at({group_index, match_index});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (g.meta.is_any_empty()) [[likely]] {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
group_index = (group_index + 1) & group_index_mask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<typename K = key_type>
|
||||||
|
const_iterator find(const K& key) const
|
||||||
|
{
|
||||||
|
const std::size_t hash{hasher{}(key)};
|
||||||
|
const detail::meta_byte mb{detail::meta_byte_from_hash(hash)};
|
||||||
|
|
||||||
|
std::size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const group_type& g{groups[group_index]};
|
||||||
|
|
||||||
|
MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(g.meta.match(mb), {
|
||||||
|
if (key_equal{}(g.slots[match_index].value.first, key)) [[likely]] {
|
||||||
|
return const_iterator_at({group_index, match_index});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (g.meta.is_any_empty()) [[likely]] {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
group_index = (group_index + 1) & group_index_mask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<typename K = key_type>
|
||||||
|
bool contains(const K& key) const
|
||||||
|
{
|
||||||
|
return find(key) != end();
|
||||||
|
}
|
||||||
|
template<typename K = key_type>
|
||||||
|
std::size_t count(const K& key) const
|
||||||
|
{
|
||||||
|
return contains(key) ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename K = key_type>
|
||||||
|
mapped_type& operator[](K&& k)
|
||||||
|
{
|
||||||
|
return try_emplace(std::forward<K>(k)).first->second;
|
||||||
|
}
|
||||||
|
template<typename K = key_type>
|
||||||
|
mapped_type& at(K&& k)
|
||||||
|
{
|
||||||
|
const auto iter{find(k)};
|
||||||
|
if (iter == end()) {
|
||||||
|
throw std::out_of_range("ihmap::at: key not found");
|
||||||
|
}
|
||||||
|
return iter->second;
|
||||||
|
}
|
||||||
|
template<typename K = key_type>
|
||||||
|
const mapped_type& at(K&& k) const
|
||||||
|
{
|
||||||
|
const auto iter{find(k)};
|
||||||
|
if (iter == end()) {
|
||||||
|
throw std::out_of_range("ihmap::at: key not found");
|
||||||
|
}
|
||||||
|
return iter->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear()
|
||||||
|
{
|
||||||
|
for (auto iter{begin()}; iter != end(); ++iter) {
|
||||||
|
iter->~value_type();
|
||||||
|
}
|
||||||
|
|
||||||
|
clear_metadata();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
iterator iterator_at(position pos)
|
||||||
|
{
|
||||||
|
return {groups.get() + pos.group_index, pos.slot_index};
|
||||||
|
}
|
||||||
|
const_iterator const_iterator_at(position pos) const
|
||||||
|
{
|
||||||
|
return {groups.get() + pos.group_index, pos.slot_index};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<position, bool> find_key_or_empty_slot(const key_type& key)
|
||||||
|
{
|
||||||
|
const std::size_t hash{hasher{}(key)};
|
||||||
|
const detail::meta_byte mb{detail::meta_byte_from_hash(hash)};
|
||||||
|
|
||||||
|
std::size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const group_type& g{groups[group_index]};
|
||||||
|
|
||||||
|
MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(g.meta.match(mb), {
|
||||||
|
if (key_equal{}(g.slots[match_index].value.first, key)) [[likely]] {
|
||||||
|
return {{group_index, match_index}, true};
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (g.meta.is_any_empty()) [[likely]] {
|
||||||
|
return {find_empty_slot_to_insert(hash), false};
|
||||||
|
}
|
||||||
|
|
||||||
|
group_index = (group_index + 1) & group_index_mask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
position find_empty_slot_to_insert(const std::size_t hash)
|
||||||
|
{
|
||||||
|
if (empty_slots == 0) [[unlikely]] {
|
||||||
|
grow_and_rehash();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
group_type& g{groups[group_index]};
|
||||||
|
|
||||||
|
MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(g.meta.match_empty_or_tombstone(), {
|
||||||
|
if (g.meta.get(match_index) == detail::meta_byte::empty) [[likely]] {
|
||||||
|
--empty_slots;
|
||||||
|
}
|
||||||
|
++full_slots;
|
||||||
|
|
||||||
|
g.meta.set(match_index, detail::meta_byte_from_hash(hash));
|
||||||
|
|
||||||
|
return {group_index, match_index};
|
||||||
|
});
|
||||||
|
|
||||||
|
group_index = (group_index + 1) & group_index_mask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void erase_impl(position pos)
|
||||||
|
{
|
||||||
|
group_type& g{groups[pos.group_index]};
|
||||||
|
|
||||||
|
g.slots[pos.slot_index].value.~value_type();
|
||||||
|
|
||||||
|
--full_slots;
|
||||||
|
if (g.meta.is_any_empty()) {
|
||||||
|
g.meta.set(pos.slot_index, detail::meta_byte::empty);
|
||||||
|
++empty_slots;
|
||||||
|
} else {
|
||||||
|
g.meta.set(pos.slot_index, detail::meta_byte::tombstone);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void grow_and_rehash()
|
||||||
|
{
|
||||||
|
const std::size_t new_group_count{2 * (group_index_mask + 1)};
|
||||||
|
|
||||||
|
pow2_resize(new_group_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
void pow2_resize(std::size_t new_group_count)
|
||||||
|
{
|
||||||
|
auto iter{begin()};
|
||||||
|
|
||||||
|
const auto old_groups{std::move(groups)};
|
||||||
|
|
||||||
|
initialize_members(new_group_count);
|
||||||
|
|
||||||
|
for (; iter != end(); ++iter) {
|
||||||
|
const std::size_t hash{hasher{}(iter->first)};
|
||||||
|
const position pos{find_empty_slot_to_insert(hash)};
|
||||||
|
|
||||||
|
new (&groups[pos.group_index].slots[pos.slot_index].value) value_type(std::move(iter.group_ptr->slots[iter.slot_index].value));
|
||||||
|
iter.group_ptr->slots[iter.slot_index].value.~value_type();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void deep_copy(const ihmap& other)
|
||||||
|
{
|
||||||
|
initialize_members(other.group_index_mask + 1);
|
||||||
|
|
||||||
|
for (auto iter = other.begin(); iter != other.end(); ++iter) {
|
||||||
|
const std::size_t hash{hasher{}(iter->first)};
|
||||||
|
const position pos{find_empty_slot_to_insert(hash)};
|
||||||
|
|
||||||
|
new (&groups[pos.group_index].slots[pos.slot_index].value) value_type(iter.group_ptr->slots[iter.slot_index].value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void initialize_members(std::size_t group_count)
|
||||||
|
{
|
||||||
|
// DEBUG_ASSERT(group_count != 0 && std::ispow2(group_count));
|
||||||
|
|
||||||
|
group_index_mask = group_count - 1;
|
||||||
|
groups = std::unique_ptr<group_type[]>{new group_type[group_count]};
|
||||||
|
|
||||||
|
clear_metadata();
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear_metadata()
|
||||||
|
{
|
||||||
|
const std::size_t group_count{group_index_mask + 1};
|
||||||
|
|
||||||
|
empty_slots = group_count * average_max_group_load;
|
||||||
|
full_slots = 0;
|
||||||
|
|
||||||
|
for (size_t i{0}; i < group_count; ++i) {
|
||||||
|
groups[i].meta = detail::meta_byte_group{detail::ihmap_default_meta};
|
||||||
|
}
|
||||||
|
groups[group_count - 1].meta.set(group_size, detail::meta_byte::end_sentinel);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t group_index_mask;
|
||||||
|
std::size_t empty_slots;
|
||||||
|
std::size_t full_slots;
|
||||||
|
std::unique_ptr<group_type[]> groups;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace mcl
|
|
@ -21,7 +21,8 @@ class intrusive_list_iterator;
|
||||||
template<typename T>
|
template<typename T>
|
||||||
class intrusive_list_node {
|
class intrusive_list_node {
|
||||||
public:
|
public:
|
||||||
bool is_sentinel() const {
|
bool is_sentinel() const
|
||||||
|
{
|
||||||
return is_sentinel_;
|
return is_sentinel_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,7 +43,8 @@ class intrusive_list_sentinel final : public intrusive_list_node<T> {
|
||||||
using intrusive_list_node<T>::is_sentinel_;
|
using intrusive_list_node<T>::is_sentinel_;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
intrusive_list_sentinel() {
|
intrusive_list_sentinel()
|
||||||
|
{
|
||||||
next = this;
|
next = this;
|
||||||
prev = this;
|
prev = this;
|
||||||
is_sentinel_ = true;
|
is_sentinel_ = true;
|
||||||
|
@ -72,50 +74,56 @@ public:
|
||||||
intrusive_list_iterator& operator=(const intrusive_list_iterator& other) = default;
|
intrusive_list_iterator& operator=(const intrusive_list_iterator& other) = default;
|
||||||
|
|
||||||
explicit intrusive_list_iterator(node_pointer list_node)
|
explicit intrusive_list_iterator(node_pointer list_node)
|
||||||
: node(list_node) {
|
: node(list_node) {}
|
||||||
}
|
|
||||||
explicit intrusive_list_iterator(pointer data)
|
explicit intrusive_list_iterator(pointer data)
|
||||||
: node(data) {
|
: node(data) {}
|
||||||
}
|
|
||||||
explicit intrusive_list_iterator(reference data)
|
explicit intrusive_list_iterator(reference data)
|
||||||
: node(&data) {
|
: node(&data) {}
|
||||||
}
|
|
||||||
|
|
||||||
intrusive_list_iterator& operator++() {
|
intrusive_list_iterator& operator++()
|
||||||
|
{
|
||||||
node = node->next;
|
node = node->next;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
intrusive_list_iterator& operator--() {
|
intrusive_list_iterator& operator--()
|
||||||
|
{
|
||||||
node = node->prev;
|
node = node->prev;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
intrusive_list_iterator operator++(int) {
|
intrusive_list_iterator operator++(int)
|
||||||
|
{
|
||||||
intrusive_list_iterator it(*this);
|
intrusive_list_iterator it(*this);
|
||||||
++*this;
|
++*this;
|
||||||
return it;
|
return it;
|
||||||
}
|
}
|
||||||
intrusive_list_iterator operator--(int) {
|
intrusive_list_iterator operator--(int)
|
||||||
|
{
|
||||||
intrusive_list_iterator it(*this);
|
intrusive_list_iterator it(*this);
|
||||||
--*this;
|
--*this;
|
||||||
return it;
|
return it;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator==(const intrusive_list_iterator& other) const {
|
bool operator==(const intrusive_list_iterator& other) const
|
||||||
|
{
|
||||||
return node == other.node;
|
return node == other.node;
|
||||||
}
|
}
|
||||||
bool operator!=(const intrusive_list_iterator& other) const {
|
bool operator!=(const intrusive_list_iterator& other) const
|
||||||
|
{
|
||||||
return !operator==(other);
|
return !operator==(other);
|
||||||
}
|
}
|
||||||
|
|
||||||
reference operator*() const {
|
reference operator*() const
|
||||||
|
{
|
||||||
DEBUG_ASSERT(!node->is_sentinel());
|
DEBUG_ASSERT(!node->is_sentinel());
|
||||||
return static_cast<reference>(*node);
|
return static_cast<reference>(*node);
|
||||||
}
|
}
|
||||||
pointer operator->() const {
|
pointer operator->() const
|
||||||
|
{
|
||||||
return std::addressof(operator*());
|
return std::addressof(operator*());
|
||||||
}
|
}
|
||||||
|
|
||||||
node_pointer AsNodePointer() const {
|
node_pointer AsNodePointer() const
|
||||||
|
{
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -145,7 +153,8 @@ public:
|
||||||
* @param location The location to insert the node.
|
* @param location The location to insert the node.
|
||||||
* @param new_node The node to add.
|
* @param new_node The node to add.
|
||||||
*/
|
*/
|
||||||
iterator insert(iterator location, pointer new_node) {
|
iterator insert(iterator location, pointer new_node)
|
||||||
|
{
|
||||||
return insert_before(location, new_node);
|
return insert_before(location, new_node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -156,7 +165,8 @@ public:
|
||||||
* @param location The location to insert the new node.
|
* @param location The location to insert the new node.
|
||||||
* @param new_node The node to insert into the list.
|
* @param new_node The node to insert into the list.
|
||||||
*/
|
*/
|
||||||
iterator insert_before(iterator location, pointer new_node) {
|
iterator insert_before(iterator location, pointer new_node)
|
||||||
|
{
|
||||||
auto existing_node = location.AsNodePointer();
|
auto existing_node = location.AsNodePointer();
|
||||||
|
|
||||||
new_node->next = existing_node;
|
new_node->next = existing_node;
|
||||||
|
@ -173,7 +183,8 @@ public:
|
||||||
* @param position Location to insert the node in front of.
|
* @param position Location to insert the node in front of.
|
||||||
* @param new_node The node to be inserted into the list.
|
* @param new_node The node to be inserted into the list.
|
||||||
*/
|
*/
|
||||||
iterator insert_after(iterator position, pointer new_node) {
|
iterator insert_after(iterator position, pointer new_node)
|
||||||
|
{
|
||||||
if (empty())
|
if (empty())
|
||||||
return insert(begin(), new_node);
|
return insert(begin(), new_node);
|
||||||
|
|
||||||
|
@ -184,7 +195,8 @@ public:
|
||||||
* Add an entry to the start of the list.
|
* Add an entry to the start of the list.
|
||||||
* @param node Node to add to the list.
|
* @param node Node to add to the list.
|
||||||
*/
|
*/
|
||||||
void push_front(pointer node) {
|
void push_front(pointer node)
|
||||||
|
{
|
||||||
insert(begin(), node);
|
insert(begin(), node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -192,7 +204,8 @@ public:
|
||||||
* Add an entry to the end of the list
|
* Add an entry to the end of the list
|
||||||
* @param node Node to add to the list.
|
* @param node Node to add to the list.
|
||||||
*/
|
*/
|
||||||
void push_back(pointer node) {
|
void push_back(pointer node)
|
||||||
|
{
|
||||||
insert(end(), node);
|
insert(end(), node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -200,7 +213,8 @@ public:
|
||||||
* Erases the node at the front of the list.
|
* Erases the node at the front of the list.
|
||||||
* @note Must not be called on an empty list.
|
* @note Must not be called on an empty list.
|
||||||
*/
|
*/
|
||||||
void pop_front() {
|
void pop_front()
|
||||||
|
{
|
||||||
DEBUG_ASSERT(!empty());
|
DEBUG_ASSERT(!empty());
|
||||||
erase(begin());
|
erase(begin());
|
||||||
}
|
}
|
||||||
|
@ -209,7 +223,8 @@ public:
|
||||||
* Erases the node at the back of the list.
|
* Erases the node at the back of the list.
|
||||||
* @note Must not be called on an empty list.
|
* @note Must not be called on an empty list.
|
||||||
*/
|
*/
|
||||||
void pop_back() {
|
void pop_back()
|
||||||
|
{
|
||||||
DEBUG_ASSERT(!empty());
|
DEBUG_ASSERT(!empty());
|
||||||
erase(--end());
|
erase(--end());
|
||||||
}
|
}
|
||||||
|
@ -218,7 +233,8 @@ public:
|
||||||
* Removes a node from this list
|
* Removes a node from this list
|
||||||
* @param it An iterator that points to the node to remove from list.
|
* @param it An iterator that points to the node to remove from list.
|
||||||
*/
|
*/
|
||||||
pointer remove(iterator& it) {
|
pointer remove(iterator& it)
|
||||||
|
{
|
||||||
DEBUG_ASSERT(it != end());
|
DEBUG_ASSERT(it != end());
|
||||||
|
|
||||||
pointer node = &*it++;
|
pointer node = &*it++;
|
||||||
|
@ -237,7 +253,8 @@ public:
|
||||||
* Removes a node from this list
|
* Removes a node from this list
|
||||||
* @param it A constant iterator that points to the node to remove from list.
|
* @param it A constant iterator that points to the node to remove from list.
|
||||||
*/
|
*/
|
||||||
pointer remove(const iterator& it) {
|
pointer remove(const iterator& it)
|
||||||
|
{
|
||||||
iterator copy = it;
|
iterator copy = it;
|
||||||
return remove(copy);
|
return remove(copy);
|
||||||
}
|
}
|
||||||
|
@ -246,7 +263,8 @@ public:
|
||||||
* Removes a node from this list.
|
* Removes a node from this list.
|
||||||
* @param node A pointer to the node to remove.
|
* @param node A pointer to the node to remove.
|
||||||
*/
|
*/
|
||||||
pointer remove(pointer node) {
|
pointer remove(pointer node)
|
||||||
|
{
|
||||||
return remove(iterator(node));
|
return remove(iterator(node));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -254,7 +272,8 @@ public:
|
||||||
* Removes a node from this list.
|
* Removes a node from this list.
|
||||||
* @param node A reference to the node to remove.
|
* @param node A reference to the node to remove.
|
||||||
*/
|
*/
|
||||||
pointer remove(reference node) {
|
pointer remove(reference node)
|
||||||
|
{
|
||||||
return remove(iterator(node));
|
return remove(iterator(node));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -262,7 +281,8 @@ public:
|
||||||
* Is this list empty?
|
* Is this list empty?
|
||||||
* @returns true if there are no nodes in this list.
|
* @returns true if there are no nodes in this list.
|
||||||
*/
|
*/
|
||||||
bool empty() const {
|
bool empty() const
|
||||||
|
{
|
||||||
return root->next == root.get();
|
return root->next == root.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -270,7 +290,8 @@ public:
|
||||||
* Gets the total number of elements within this list.
|
* Gets the total number of elements within this list.
|
||||||
* @return the number of elements in this list.
|
* @return the number of elements in this list.
|
||||||
*/
|
*/
|
||||||
size_type size() const {
|
size_type size() const
|
||||||
|
{
|
||||||
return static_cast<size_type>(std::distance(begin(), end()));
|
return static_cast<size_type>(std::distance(begin(), end()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -278,7 +299,8 @@ public:
|
||||||
* Retrieves a reference to the node at the front of the list.
|
* Retrieves a reference to the node at the front of the list.
|
||||||
* @note Must not be called on an empty list.
|
* @note Must not be called on an empty list.
|
||||||
*/
|
*/
|
||||||
reference front() {
|
reference front()
|
||||||
|
{
|
||||||
DEBUG_ASSERT(!empty());
|
DEBUG_ASSERT(!empty());
|
||||||
return *begin();
|
return *begin();
|
||||||
}
|
}
|
||||||
|
@ -287,7 +309,8 @@ public:
|
||||||
* Retrieves a constant reference to the node at the front of the list.
|
* Retrieves a constant reference to the node at the front of the list.
|
||||||
* @note Must not be called on an empty list.
|
* @note Must not be called on an empty list.
|
||||||
*/
|
*/
|
||||||
const_reference front() const {
|
const_reference front() const
|
||||||
|
{
|
||||||
DEBUG_ASSERT(!empty());
|
DEBUG_ASSERT(!empty());
|
||||||
return *begin();
|
return *begin();
|
||||||
}
|
}
|
||||||
|
@ -296,7 +319,8 @@ public:
|
||||||
* Retrieves a reference to the node at the back of the list.
|
* Retrieves a reference to the node at the back of the list.
|
||||||
* @note Must not be called on an empty list.
|
* @note Must not be called on an empty list.
|
||||||
*/
|
*/
|
||||||
reference back() {
|
reference back()
|
||||||
|
{
|
||||||
DEBUG_ASSERT(!empty());
|
DEBUG_ASSERT(!empty());
|
||||||
return *--end();
|
return *--end();
|
||||||
}
|
}
|
||||||
|
@ -305,7 +329,8 @@ public:
|
||||||
* Retrieves a constant reference to the node at the back of the list.
|
* Retrieves a constant reference to the node at the back of the list.
|
||||||
* @note Must not be called on an empty list.
|
* @note Must not be called on an empty list.
|
||||||
*/
|
*/
|
||||||
const_reference back() const {
|
const_reference back() const
|
||||||
|
{
|
||||||
DEBUG_ASSERT(!empty());
|
DEBUG_ASSERT(!empty());
|
||||||
return *--end();
|
return *--end();
|
||||||
}
|
}
|
||||||
|
@ -331,7 +356,8 @@ public:
|
||||||
* Erases a node from the list, indicated by an iterator.
|
* Erases a node from the list, indicated by an iterator.
|
||||||
* @param it The iterator that points to the node to erase.
|
* @param it The iterator that points to the node to erase.
|
||||||
*/
|
*/
|
||||||
iterator erase(iterator it) {
|
iterator erase(iterator it)
|
||||||
|
{
|
||||||
remove(it);
|
remove(it);
|
||||||
return it;
|
return it;
|
||||||
}
|
}
|
||||||
|
@ -340,7 +366,8 @@ public:
|
||||||
* Erases a node from this list.
|
* Erases a node from this list.
|
||||||
* @param node A pointer to the node to erase from this list.
|
* @param node A pointer to the node to erase from this list.
|
||||||
*/
|
*/
|
||||||
iterator erase(pointer node) {
|
iterator erase(pointer node)
|
||||||
|
{
|
||||||
return erase(iterator(node));
|
return erase(iterator(node));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -348,7 +375,8 @@ public:
|
||||||
* Erases a node from this list.
|
* Erases a node from this list.
|
||||||
* @param node A reference to the node to erase from this list.
|
* @param node A reference to the node to erase from this list.
|
||||||
*/
|
*/
|
||||||
iterator erase(reference node) {
|
iterator erase(reference node)
|
||||||
|
{
|
||||||
return erase(iterator(node));
|
return erase(iterator(node));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -356,7 +384,8 @@ public:
|
||||||
* Exchanges contents of this list with another list instance.
|
* Exchanges contents of this list with another list instance.
|
||||||
* @param other The other list to swap with.
|
* @param other The other list to swap with.
|
||||||
*/
|
*/
|
||||||
void swap(intrusive_list& other) noexcept {
|
void swap(intrusive_list& other) noexcept
|
||||||
|
{
|
||||||
root.swap(other.root);
|
root.swap(other.root);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -371,7 +400,8 @@ private:
|
||||||
* @param rhs The second list.
|
* @param rhs The second list.
|
||||||
*/
|
*/
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void swap(intrusive_list<T>& lhs, intrusive_list<T>& rhs) noexcept {
|
void swap(intrusive_list<T>& lhs, intrusive_list<T>& rhs) noexcept
|
||||||
|
{
|
||||||
lhs.swap(rhs);
|
lhs.swap(rhs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
32
externals/dynarmic/externals/mcl/include/mcl/hash/xmrx.hpp
vendored
Executable file
32
externals/dynarmic/externals/mcl/include/mcl/hash/xmrx.hpp
vendored
Executable file
|
@ -0,0 +1,32 @@
|
||||||
|
// This file is part of the mcl project.
|
||||||
|
// Copyright (c) 2022 merryhime
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
// Reference: http://jonkagstrom.com/bit-mixer-construction/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
|
#include "mcl/bit/rotate.hpp"
|
||||||
|
#include "mcl/stdint.hpp"
|
||||||
|
|
||||||
|
namespace mcl::hash {
|
||||||
|
|
||||||
|
constexpr size_t xmrx(size_t x)
|
||||||
|
{
|
||||||
|
x ^= x >> 32;
|
||||||
|
x *= 0xff51afd7ed558ccd;
|
||||||
|
x ^= bit::rotate_right(x, 47) ^ bit::rotate_right(x, 23);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
struct avalanche_xmrx {
|
||||||
|
size_t operator()(const T& value)
|
||||||
|
{
|
||||||
|
return xmrx(std::hash<T>{}(value));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace mcl::hash
|
|
@ -13,12 +13,14 @@ template<typename T>
|
||||||
struct reverse_adapter {
|
struct reverse_adapter {
|
||||||
T& iterable;
|
T& iterable;
|
||||||
|
|
||||||
constexpr auto begin() {
|
constexpr auto begin()
|
||||||
|
{
|
||||||
using namespace std;
|
using namespace std;
|
||||||
return rbegin(iterable);
|
return rbegin(iterable);
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr auto end() {
|
constexpr auto end()
|
||||||
|
{
|
||||||
using namespace std;
|
using namespace std;
|
||||||
return rend(iterable);
|
return rend(iterable);
|
||||||
}
|
}
|
||||||
|
@ -27,7 +29,8 @@ struct reverse_adapter {
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
constexpr detail::reverse_adapter<T> reverse(T&& iterable) {
|
constexpr detail::reverse_adapter<T> reverse(T&& iterable)
|
||||||
|
{
|
||||||
return detail::reverse_adapter<T>{iterable};
|
return detail::reverse_adapter<T>{iterable};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
46
externals/dynarmic/externals/mcl/include/mcl/memory/overaligned_unique_ptr.hpp
vendored
Executable file
46
externals/dynarmic/externals/mcl/include/mcl/memory/overaligned_unique_ptr.hpp
vendored
Executable file
|
@ -0,0 +1,46 @@
|
||||||
|
// This file is part of the mcl project.
|
||||||
|
// Copyright (c) 2022 merryhime
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
# include <malloc.h>
|
||||||
|
#else
|
||||||
|
# include <cstdlib>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace mcl {
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
struct aligned_alloc_deleter {
|
||||||
|
template<typename T>
|
||||||
|
void operator()(T* p) const
|
||||||
|
{
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
_aligned_free(const_cast<std::remove_const_t<T>*>(p));
|
||||||
|
#else
|
||||||
|
std::free(const_cast<std::remove_const_t<T>*>(p));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
|
template<size_t, typename T>
|
||||||
|
using overaligned_unique_ptr = std::unique_ptr<T, detail::aligned_alloc_deleter>;
|
||||||
|
|
||||||
|
template<size_t alignment, typename T>
|
||||||
|
auto make_overaligned_unique_ptr_array(size_t element_count)
|
||||||
|
{
|
||||||
|
const size_t min_size = element_count * sizeof(T);
|
||||||
|
const size_t alloc_size = (min_size + alignment - 1) / alignment * alignment;
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
return overaligned_unique_ptr<alignment, T[]>{static_cast<T*>(_aligned_malloc(alloc_size, alignment))};
|
||||||
|
#else
|
||||||
|
return overaligned_unique_ptr<alignment, T[]>{static_cast<T*>(std::aligned_alloc(alignment, alloc_size))};
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mcl
|
|
@ -21,7 +21,9 @@ class scope_exit final {
|
||||||
public:
|
public:
|
||||||
explicit scope_exit(Function&& fn)
|
explicit scope_exit(Function&& fn)
|
||||||
: function(std::move(fn)) {}
|
: function(std::move(fn)) {}
|
||||||
~scope_exit() noexcept {
|
|
||||||
|
~scope_exit() noexcept
|
||||||
|
{
|
||||||
function();
|
function();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -34,7 +36,9 @@ class scope_fail final {
|
||||||
public:
|
public:
|
||||||
explicit scope_fail(Function&& fn)
|
explicit scope_fail(Function&& fn)
|
||||||
: function(std::move(fn)), exception_count(std::uncaught_exceptions()) {}
|
: function(std::move(fn)), exception_count(std::uncaught_exceptions()) {}
|
||||||
~scope_fail() noexcept {
|
|
||||||
|
~scope_fail() noexcept
|
||||||
|
{
|
||||||
if (std::uncaught_exceptions() > exception_count) {
|
if (std::uncaught_exceptions() > exception_count) {
|
||||||
function();
|
function();
|
||||||
}
|
}
|
||||||
|
@ -50,7 +54,9 @@ class scope_success final {
|
||||||
public:
|
public:
|
||||||
explicit scope_success(Function&& fn)
|
explicit scope_success(Function&& fn)
|
||||||
: function(std::move(fn)), exception_count(std::uncaught_exceptions()) {}
|
: function(std::move(fn)), exception_count(std::uncaught_exceptions()) {}
|
||||||
~scope_success() {
|
|
||||||
|
~scope_success()
|
||||||
|
{
|
||||||
if (std::uncaught_exceptions() <= exception_count) {
|
if (std::uncaught_exceptions() <= exception_count) {
|
||||||
function();
|
function();
|
||||||
}
|
}
|
||||||
|
@ -64,17 +70,20 @@ private:
|
||||||
// We use ->* here as it has the highest precedence of the operators we can use.
|
// We use ->* here as it has the highest precedence of the operators we can use.
|
||||||
|
|
||||||
template<typename Function>
|
template<typename Function>
|
||||||
auto operator->*(scope_exit_tag, Function&& function) {
|
auto operator->*(scope_exit_tag, Function&& function)
|
||||||
|
{
|
||||||
return scope_exit<std::decay_t<Function>>{std::forward<Function>(function)};
|
return scope_exit<std::decay_t<Function>>{std::forward<Function>(function)};
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Function>
|
template<typename Function>
|
||||||
auto operator->*(scope_fail_tag, Function&& function) {
|
auto operator->*(scope_fail_tag, Function&& function)
|
||||||
|
{
|
||||||
return scope_fail<std::decay_t<Function>>{std::forward<Function>(function)};
|
return scope_fail<std::decay_t<Function>>{std::forward<Function>(function)};
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Function>
|
template<typename Function>
|
||||||
auto operator->*(scope_success_tag, Function&& function) {
|
auto operator->*(scope_success_tag, Function&& function)
|
||||||
|
{
|
||||||
return scope_success<std::decay_t<Function>>{std::forward<Function>(function)};
|
return scope_success<std::decay_t<Function>>{std::forward<Function>(function)};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -11,8 +11,9 @@
|
||||||
|
|
||||||
namespace mcl::detail {
|
namespace mcl::detail {
|
||||||
|
|
||||||
[[noreturn]] void assert_terminate_impl(fmt::string_view msg, fmt::format_args args) {
|
[[noreturn]] void assert_terminate_impl(const char* expr_str, fmt::string_view msg, fmt::format_args args)
|
||||||
fmt::print(stderr, "assertion failed: ");
|
{
|
||||||
|
fmt::print(stderr, "assertion failed: {}\nMessage:", expr_str);
|
||||||
fmt::vprint(stderr, msg, args);
|
fmt::vprint(stderr, msg, args);
|
||||||
std::fflush(stderr);
|
std::fflush(stderr);
|
||||||
std::terminate();
|
std::terminate();
|
||||||
|
|
|
@ -1,13 +1,14 @@
|
||||||
add_executable(mcl-tests
|
add_executable(mcl-tests
|
||||||
bit/bit_field_tests.cpp
|
bit/bit_field_tests.cpp
|
||||||
main.cpp
|
container/hmap.cpp
|
||||||
|
container/ihmap.cpp
|
||||||
mp/metavalue_tests.cpp
|
mp/metavalue_tests.cpp
|
||||||
mp/typelist_tests.cpp
|
mp/typelist_tests.cpp
|
||||||
type_traits/type_traits_tests.cpp
|
type_traits/type_traits_tests.cpp
|
||||||
)
|
)
|
||||||
target_include_directories(mcl-tests PUBLIC .)
|
target_include_directories(mcl-tests PUBLIC .)
|
||||||
target_compile_options(mcl-tests PRIVATE ${STAMINA_CXX_FLAGS})
|
target_compile_options(mcl-tests PRIVATE ${STAMINA_CXX_FLAGS})
|
||||||
target_link_libraries(mcl-tests PRIVATE Catch2::Catch2 mcl)
|
target_link_libraries(mcl-tests PRIVATE Catch2::Catch2WithMain mcl)
|
||||||
|
|
||||||
include(CTest)
|
include(CTest)
|
||||||
include(Catch)
|
include(Catch)
|
||||||
|
|
|
@ -5,11 +5,12 @@
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
|
||||||
#include <catch2/catch.hpp>
|
#include <catch2/catch_test_macros.hpp>
|
||||||
#include <mcl/bit/bit_field.hpp>
|
#include <mcl/bit/bit_field.hpp>
|
||||||
#include <mcl/stdint.hpp>
|
#include <mcl/stdint.hpp>
|
||||||
|
|
||||||
TEST_CASE("mcl::bit::ones", "[bit]") {
|
TEST_CASE("mcl::bit::ones", "[bit]")
|
||||||
|
{
|
||||||
const std::array cases{
|
const std::array cases{
|
||||||
std::make_tuple<size_t, u8>(0, 0x00),
|
std::make_tuple<size_t, u8>(0, 0x00),
|
||||||
std::make_tuple<size_t, u8>(1, 0x01),
|
std::make_tuple<size_t, u8>(1, 0x01),
|
||||||
|
|
66
externals/dynarmic/externals/mcl/tests/container/hmap.cpp
vendored
Executable file
66
externals/dynarmic/externals/mcl/tests/container/hmap.cpp
vendored
Executable file
|
@ -0,0 +1,66 @@
|
||||||
|
// This file is part of the mcl project.
|
||||||
|
// Copyright (c) 2022 merryhime
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include <catch2/catch_test_macros.hpp>
|
||||||
|
#include <fmt/core.h>
|
||||||
|
#include <mcl/container/hmap.hpp>
|
||||||
|
#include <mcl/stdint.hpp>
|
||||||
|
|
||||||
|
TEST_CASE("mcl::hmap", "[hmap]")
|
||||||
|
{
|
||||||
|
mcl::hmap<u64, u64> double_map;
|
||||||
|
|
||||||
|
constexpr int count = 100000;
|
||||||
|
|
||||||
|
REQUIRE(double_map.empty());
|
||||||
|
|
||||||
|
for (int i = 0; i < count; ++i) {
|
||||||
|
double_map[i] = i * 2;
|
||||||
|
REQUIRE(double_map.size() == i + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < count; ++i) {
|
||||||
|
REQUIRE(double_map[i] == i * 2);
|
||||||
|
REQUIRE(double_map.contains(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < count; ++i) {
|
||||||
|
auto iter = double_map.find(i);
|
||||||
|
REQUIRE(iter->first == i);
|
||||||
|
REQUIRE(iter->second == i * 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = count; i < count * 2; ++i) {
|
||||||
|
REQUIRE(!double_map.contains(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < count; ++i) {
|
||||||
|
auto result = double_map.try_emplace(i, 0);
|
||||||
|
REQUIRE(!result.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto [k, v] : double_map) {
|
||||||
|
REQUIRE(k * 2 == v);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unordered_map<u64, size_t> indexes_count;
|
||||||
|
for (auto [k, v] : double_map) {
|
||||||
|
(void)v;
|
||||||
|
indexes_count[k]++;
|
||||||
|
}
|
||||||
|
for (auto [k, v] : indexes_count) {
|
||||||
|
(void)k;
|
||||||
|
REQUIRE(v == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
REQUIRE(!double_map.empty());
|
||||||
|
double_map.clear();
|
||||||
|
REQUIRE(double_map.empty());
|
||||||
|
|
||||||
|
for (auto [k, v] : double_map) {
|
||||||
|
REQUIRE(false);
|
||||||
|
}
|
||||||
|
}
|
66
externals/dynarmic/externals/mcl/tests/container/ihmap.cpp
vendored
Executable file
66
externals/dynarmic/externals/mcl/tests/container/ihmap.cpp
vendored
Executable file
|
@ -0,0 +1,66 @@
|
||||||
|
// This file is part of the mcl project.
|
||||||
|
// Copyright (c) 2022 merryhime
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include <catch2/catch_test_macros.hpp>
|
||||||
|
#include <fmt/core.h>
|
||||||
|
#include <mcl/container/ihmap.hpp>
|
||||||
|
#include <mcl/stdint.hpp>
|
||||||
|
|
||||||
|
TEST_CASE("mcl::ihmap", "[ihmap]")
|
||||||
|
{
|
||||||
|
mcl::ihmap<u64, u64> double_map;
|
||||||
|
|
||||||
|
constexpr int count = 100000;
|
||||||
|
|
||||||
|
REQUIRE(double_map.empty());
|
||||||
|
|
||||||
|
for (int i = 0; i < count; ++i) {
|
||||||
|
double_map[i] = i * 2;
|
||||||
|
REQUIRE(double_map.size() == i + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < count; ++i) {
|
||||||
|
REQUIRE(double_map[i] == i * 2);
|
||||||
|
REQUIRE(double_map.contains(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < count; ++i) {
|
||||||
|
auto iter = double_map.find(i);
|
||||||
|
REQUIRE(iter->first == i);
|
||||||
|
REQUIRE(iter->second == i * 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = count; i < count * 2; ++i) {
|
||||||
|
REQUIRE(!double_map.contains(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < count; ++i) {
|
||||||
|
auto result = double_map.try_emplace(i, 0);
|
||||||
|
REQUIRE(!result.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto [k, v] : double_map) {
|
||||||
|
REQUIRE(k * 2 == v);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unordered_map<u64, size_t> indexes_count;
|
||||||
|
for (auto [k, v] : double_map) {
|
||||||
|
(void)v;
|
||||||
|
indexes_count[k]++;
|
||||||
|
}
|
||||||
|
for (auto [k, v] : indexes_count) {
|
||||||
|
(void)k;
|
||||||
|
REQUIRE(v == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
REQUIRE(!double_map.empty());
|
||||||
|
double_map.clear();
|
||||||
|
REQUIRE(double_map.empty());
|
||||||
|
|
||||||
|
for (auto [k, v] : double_map) {
|
||||||
|
REQUIRE(false);
|
||||||
|
}
|
||||||
|
}
|
218
externals/dynarmic/externals/oaknut/.clang-format
vendored
Executable file
218
externals/dynarmic/externals/oaknut/.clang-format
vendored
Executable file
|
@ -0,0 +1,218 @@
|
||||||
|
---
|
||||||
|
Language: Cpp
|
||||||
|
AccessModifierOffset: -4
|
||||||
|
AlignAfterOpenBracket: Align
|
||||||
|
AlignConsecutiveMacros: None
|
||||||
|
AlignConsecutiveAssignments: None
|
||||||
|
AlignConsecutiveBitFields: None
|
||||||
|
AlignConsecutiveDeclarations: None
|
||||||
|
AlignConsecutiveMacros: None
|
||||||
|
AlignEscapedNewlines: Right
|
||||||
|
AlignOperands: AlignAfterOperator
|
||||||
|
AlignTrailingComments: true
|
||||||
|
AllowAllArgumentsOnNextLine: true
|
||||||
|
AllowAllConstructorInitializersOnNextLine: true
|
||||||
|
AllowAllParametersOfDeclarationOnNextLine: true
|
||||||
|
AllowShortEnumsOnASingleLine: true
|
||||||
|
AllowShortBlocksOnASingleLine: Empty
|
||||||
|
AllowShortCaseLabelsOnASingleLine: false
|
||||||
|
AllowShortFunctionsOnASingleLine: Inline
|
||||||
|
AllowShortLambdasOnASingleLine: All
|
||||||
|
AllowShortIfStatementsOnASingleLine: Never
|
||||||
|
AllowShortLoopsOnASingleLine: false
|
||||||
|
AlwaysBreakAfterDefinitionReturnType: None
|
||||||
|
AlwaysBreakAfterReturnType: None
|
||||||
|
AlwaysBreakBeforeMultilineStrings: true
|
||||||
|
AlwaysBreakTemplateDeclarations: Yes
|
||||||
|
AttributeMacros:
|
||||||
|
- __capability
|
||||||
|
BinPackArguments: true
|
||||||
|
BinPackParameters: false
|
||||||
|
BitFieldColonSpacing: Both
|
||||||
|
BraceWrapping:
|
||||||
|
AfterCaseLabel: false
|
||||||
|
AfterClass: false
|
||||||
|
AfterControlStatement: Never
|
||||||
|
AfterEnum: false
|
||||||
|
AfterFunction: true
|
||||||
|
AfterNamespace: false
|
||||||
|
AfterObjCDeclaration: false
|
||||||
|
AfterStruct: false
|
||||||
|
AfterUnion: false
|
||||||
|
AfterExternBlock: false
|
||||||
|
BeforeCatch: false
|
||||||
|
BeforeElse: false
|
||||||
|
BeforeLambdaBody: false
|
||||||
|
BeforeWhile: false
|
||||||
|
IndentBraces: false
|
||||||
|
SplitEmptyFunction: false
|
||||||
|
SplitEmptyRecord: false
|
||||||
|
SplitEmptyNamespace: false
|
||||||
|
BreakBeforeBinaryOperators: All
|
||||||
|
BreakBeforeBraces: Custom
|
||||||
|
BreakBeforeConceptDeclarations: true
|
||||||
|
BreakBeforeTernaryOperators: true
|
||||||
|
BreakBeforeInheritanceComma: false
|
||||||
|
BreakConstructorInitializersBeforeComma: true
|
||||||
|
BreakConstructorInitializers: BeforeComma
|
||||||
|
BreakInheritanceList: BeforeComma
|
||||||
|
BreakAfterJavaFieldAnnotations: false
|
||||||
|
BreakStringLiterals: true
|
||||||
|
ColumnLimit: 0
|
||||||
|
CommentPragmas: '^ IWYU pragma:'
|
||||||
|
CompactNamespaces: false
|
||||||
|
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
||||||
|
ConstructorInitializerIndentWidth: 4
|
||||||
|
ContinuationIndentWidth: 4
|
||||||
|
Cpp11BracedListStyle: true
|
||||||
|
DeriveLineEnding: true
|
||||||
|
DerivePointerAlignment: false
|
||||||
|
DisableFormat: false
|
||||||
|
# EmptyLineAfterAccessModifier: Leave
|
||||||
|
EmptyLineBeforeAccessModifier: Always
|
||||||
|
ExperimentalAutoDetectBinPacking: false
|
||||||
|
FixNamespaceComments: true
|
||||||
|
ForEachMacros:
|
||||||
|
- foreach
|
||||||
|
- Q_FOREACH
|
||||||
|
- BOOST_FOREACH
|
||||||
|
IncludeBlocks: Regroup
|
||||||
|
IncludeCategories:
|
||||||
|
- Regex: '^<mach/'
|
||||||
|
Priority: 1
|
||||||
|
SortPriority: 0
|
||||||
|
CaseSensitive: false
|
||||||
|
- Regex: '^<windows.h>'
|
||||||
|
Priority: 1
|
||||||
|
SortPriority: 0
|
||||||
|
CaseSensitive: false
|
||||||
|
- Regex: '(^<signal.h>)|(^<sys/ucontext.h>)|(^<ucontext.h>)'
|
||||||
|
Priority: 1
|
||||||
|
SortPriority: 0
|
||||||
|
CaseSensitive: false
|
||||||
|
- Regex: '^<([^\.])*>$'
|
||||||
|
Priority: 2
|
||||||
|
SortPriority: 0
|
||||||
|
CaseSensitive: false
|
||||||
|
- Regex: '^<.*\.'
|
||||||
|
Priority: 3
|
||||||
|
SortPriority: 0
|
||||||
|
CaseSensitive: false
|
||||||
|
- Regex: '.*'
|
||||||
|
Priority: 4
|
||||||
|
SortPriority: 0
|
||||||
|
CaseSensitive: false
|
||||||
|
IncludeIsMainRegex: '([-_](test|unittest))?$'
|
||||||
|
IncludeIsMainSourceRegex: ''
|
||||||
|
# IndentAccessModifiers: false
|
||||||
|
IndentCaseBlocks: false
|
||||||
|
IndentCaseLabels: false
|
||||||
|
IndentExternBlock: NoIndent
|
||||||
|
IndentGotoLabels: false
|
||||||
|
IndentPPDirectives: AfterHash
|
||||||
|
IndentRequires: false
|
||||||
|
IndentWidth: 4
|
||||||
|
IndentWrappedFunctionNames: false
|
||||||
|
# InsertTrailingCommas: None
|
||||||
|
JavaScriptQuotes: Leave
|
||||||
|
JavaScriptWrapImports: true
|
||||||
|
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||||
|
MacroBlockBegin: ''
|
||||||
|
MacroBlockEnd: ''
|
||||||
|
MaxEmptyLinesToKeep: 1
|
||||||
|
NamespaceIndentation: None
|
||||||
|
NamespaceMacros:
|
||||||
|
ObjCBinPackProtocolList: Never
|
||||||
|
ObjCBlockIndentWidth: 2
|
||||||
|
ObjCBreakBeforeNestedBlockParam: true
|
||||||
|
ObjCSpaceAfterProperty: false
|
||||||
|
ObjCSpaceBeforeProtocolList: true
|
||||||
|
PenaltyBreakAssignment: 2
|
||||||
|
PenaltyBreakBeforeFirstCallParameter: 1
|
||||||
|
PenaltyBreakComment: 300
|
||||||
|
PenaltyBreakFirstLessLess: 120
|
||||||
|
PenaltyBreakString: 1000
|
||||||
|
PenaltyBreakTemplateDeclaration: 10
|
||||||
|
PenaltyExcessCharacter: 1000000
|
||||||
|
PenaltyReturnTypeOnItsOwnLine: 200
|
||||||
|
PenaltyIndentedWhitespace: 0
|
||||||
|
PointerAlignment: Left
|
||||||
|
RawStringFormats:
|
||||||
|
- Language: Cpp
|
||||||
|
Delimiters:
|
||||||
|
- cc
|
||||||
|
- CC
|
||||||
|
- cpp
|
||||||
|
- Cpp
|
||||||
|
- CPP
|
||||||
|
- 'c++'
|
||||||
|
- 'C++'
|
||||||
|
CanonicalDelimiter: ''
|
||||||
|
BasedOnStyle: google
|
||||||
|
- Language: TextProto
|
||||||
|
Delimiters:
|
||||||
|
- pb
|
||||||
|
- PB
|
||||||
|
- proto
|
||||||
|
- PROTO
|
||||||
|
EnclosingFunctions:
|
||||||
|
- EqualsProto
|
||||||
|
- EquivToProto
|
||||||
|
- PARSE_PARTIAL_TEXT_PROTO
|
||||||
|
- PARSE_TEST_PROTO
|
||||||
|
- PARSE_TEXT_PROTO
|
||||||
|
- ParseTextOrDie
|
||||||
|
- ParseTextProtoOrDie
|
||||||
|
- ParseTestProto
|
||||||
|
- ParsePartialTestProto
|
||||||
|
CanonicalDelimiter: ''
|
||||||
|
BasedOnStyle: google
|
||||||
|
ReflowComments: true
|
||||||
|
# ShortNamespaceLines: 5
|
||||||
|
SortIncludes: true
|
||||||
|
SortJavaStaticImport: Before
|
||||||
|
SortUsingDeclarations: true
|
||||||
|
SpaceAfterCStyleCast: false
|
||||||
|
SpaceAfterLogicalNot: false
|
||||||
|
SpaceAfterTemplateKeyword: false
|
||||||
|
SpaceAroundPointerQualifiers: Default
|
||||||
|
SpaceBeforeAssignmentOperators: true
|
||||||
|
SpaceBeforeCaseColon: false
|
||||||
|
SpaceBeforeCpp11BracedList: false
|
||||||
|
SpaceBeforeCtorInitializerColon: true
|
||||||
|
SpaceBeforeInheritanceColon: true
|
||||||
|
SpaceBeforeParens: ControlStatements
|
||||||
|
SpaceAroundPointerQualifiers: Default
|
||||||
|
SpaceBeforeRangeBasedForLoopColon: true
|
||||||
|
SpaceBeforeSquareBrackets: false
|
||||||
|
SpaceInEmptyBlock: false
|
||||||
|
SpaceInEmptyParentheses: false
|
||||||
|
SpacesBeforeTrailingComments: 2
|
||||||
|
SpacesInAngles: false
|
||||||
|
SpacesInConditionalStatement: false
|
||||||
|
SpacesInCStyleCastParentheses: false
|
||||||
|
SpacesInConditionalStatement: false
|
||||||
|
SpacesInContainerLiterals: false
|
||||||
|
# SpacesInLineCommentPrefix: -1
|
||||||
|
SpacesInParentheses: false
|
||||||
|
SpacesInSquareBrackets: false
|
||||||
|
Standard: Latest
|
||||||
|
StatementAttributeLikeMacros:
|
||||||
|
- Q_EMIT
|
||||||
|
StatementMacros:
|
||||||
|
- Q_UNUSED
|
||||||
|
- QT_REQUIRE_VERSION
|
||||||
|
TabWidth: 4
|
||||||
|
TypenameMacros:
|
||||||
|
UseCRLF: false
|
||||||
|
UseTab: Never
|
||||||
|
WhitespaceSensitiveMacros:
|
||||||
|
- STRINGIZE
|
||||||
|
- PP_STRINGIZE
|
||||||
|
- BOOST_PP_STRINGIZE
|
||||||
|
- NS_SWIFT_NAME
|
||||||
|
- CF_SWIFT_NAME
|
||||||
|
- FCODE
|
||||||
|
- ICODE
|
||||||
|
...
|
||||||
|
|
38
externals/dynarmic/externals/oaknut/.github/workflows/build-and-test.yml
vendored
Executable file
38
externals/dynarmic/externals/oaknut/.github/workflows/build-and-test.yml
vendored
Executable file
|
@ -0,0 +1,38 @@
|
||||||
|
on: [push, pull_request]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test_on_ubuntu:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
name: Build on ${{ matrix.distro }} ${{ matrix.arch }}
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- arch: aarch64
|
||||||
|
distro: ubuntu_latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- uses: uraimo/run-on-arch-action@v2
|
||||||
|
name: Build and Test
|
||||||
|
id: build
|
||||||
|
with:
|
||||||
|
arch: ${{ matrix.arch }}
|
||||||
|
distro: ${{ matrix.distro }}
|
||||||
|
shell: /bin/bash
|
||||||
|
|
||||||
|
install: |
|
||||||
|
apt-get update -q -y
|
||||||
|
apt-get install -q -y make cmake g++ git
|
||||||
|
|
||||||
|
pushd /tmp
|
||||||
|
git clone https://github.com/catchorg/Catch2.git
|
||||||
|
cd Catch2
|
||||||
|
cmake -Bbuild -H. -DBUILD_TESTING=OFF
|
||||||
|
cmake --build build/ --target install
|
||||||
|
popd
|
||||||
|
|
||||||
|
run: |
|
||||||
|
cmake -Bbuild -H.
|
||||||
|
cmake --build build
|
||||||
|
./build/oaknut-tests
|
4
externals/dynarmic/externals/oaknut/.gitignore
vendored
Executable file
4
externals/dynarmic/externals/oaknut/.gitignore
vendored
Executable file
|
@ -0,0 +1,4 @@
|
||||||
|
.DS_Store
|
||||||
|
a.out
|
||||||
|
work/
|
||||||
|
*build*/
|
57
externals/dynarmic/externals/oaknut/CMakeLists.txt
vendored
Executable file
57
externals/dynarmic/externals/oaknut/CMakeLists.txt
vendored
Executable file
|
@ -0,0 +1,57 @@
|
||||||
|
cmake_minimum_required(VERSION 3.8)
|
||||||
|
project(oaknut LANGUAGES CXX VERSION 0.0.0)
|
||||||
|
|
||||||
|
# Determine if we're built as a subproject (using add_subdirectory)
|
||||||
|
# or if this is the master project.
|
||||||
|
set(MASTER_PROJECT OFF)
|
||||||
|
if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
|
||||||
|
set(MASTER_PROJECT ON)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Disable in-source builds
|
||||||
|
set(CMAKE_DISABLE_SOURCE_CHANGES ON)
|
||||||
|
set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
|
||||||
|
if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
|
||||||
|
message(SEND_ERROR "In-source builds are not allowed.")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Source project files
|
||||||
|
set(header_files
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/arm64_encode_helpers.inc.hpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/arm64_mnemonics.inc.hpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/enum.hpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/fpsimd_mnemonics.inc.hpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/imm.hpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/list.hpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/multi_typed_name.hpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/offset.hpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/reg.hpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/string_literal.hpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/oaknut.hpp
|
||||||
|
)
|
||||||
|
|
||||||
|
# Library definition
|
||||||
|
add_library(oaknut INTERFACE)
|
||||||
|
add_library(merry::oaknut ALIAS oaknut)
|
||||||
|
target_sources(oaknut INTERFACE "$<BUILD_INTERFACE:${header_files}>")
|
||||||
|
target_include_directories(oaknut INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
|
||||||
|
target_compile_features(oaknut INTERFACE cxx_std_20)
|
||||||
|
|
||||||
|
# Tests
|
||||||
|
if (MASTER_PROJECT)
|
||||||
|
find_package(Catch2 3 REQUIRED)
|
||||||
|
|
||||||
|
add_executable(oaknut-tests
|
||||||
|
tests/basic.cpp
|
||||||
|
tests/fpsimd.cpp
|
||||||
|
tests/general.cpp
|
||||||
|
)
|
||||||
|
target_include_directories(oaknut-tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/tests)
|
||||||
|
target_link_libraries(oaknut-tests PRIVATE Catch2::Catch2WithMain merry::oaknut)
|
||||||
|
target_compile_options(oaknut-tests PRIVATE -Wall -Wextra -Wcast-qual -pedantic -pedantic-errors -Wfatal-errors -Wno-missing-braces)
|
||||||
|
|
||||||
|
include(CTest)
|
||||||
|
include(Catch)
|
||||||
|
catch_discover_tests(oaknut-tests)
|
||||||
|
enable_testing()
|
||||||
|
endif()
|
21
externals/dynarmic/externals/oaknut/LICENSE
vendored
Executable file
21
externals/dynarmic/externals/oaknut/LICENSE
vendored
Executable file
|
@ -0,0 +1,21 @@
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2022 merryhime <https://mary.rs>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
29
externals/dynarmic/externals/oaknut/README.md
vendored
Executable file
29
externals/dynarmic/externals/oaknut/README.md
vendored
Executable file
|
@ -0,0 +1,29 @@
|
||||||
|
# Oaknut
|
||||||
|
|
||||||
|
*A C++20 assembler for AArch64 (ARMv8.0)*
|
||||||
|
|
||||||
|
Oaknut is a header-only library that allows one to dynamically assemble code in-memory at runtime.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
Simple example:
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
using EmittedFunction = int (*)();
|
||||||
|
|
||||||
|
EmittedFunction EmitExample(oaknut::CodeGenerator& code, int value)
|
||||||
|
{
|
||||||
|
using namespace oaknut::util;
|
||||||
|
|
||||||
|
EmittedFunction result = code.ptr<EmittedFunction>();
|
||||||
|
|
||||||
|
code.MOVZ(W0, value);
|
||||||
|
code.RET();
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
This project is [MIT licensed](LICENSE).
|
124
externals/dynarmic/externals/oaknut/include/oaknut/code_block.hpp
vendored
Executable file
124
externals/dynarmic/externals/oaknut/include/oaknut/code_block.hpp
vendored
Executable file
|
@ -0,0 +1,124 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <new>
|
||||||
|
|
||||||
|
#if defined(_WIN32)
|
||||||
|
# include <windows.h>
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
# include <libkern/OSCacheControl.h>
|
||||||
|
# include <pthread.h>
|
||||||
|
# include <sys/mman.h>
|
||||||
|
# include <unistd.h>
|
||||||
|
#else
|
||||||
|
# include <sys/mman.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace oaknut {
|
||||||
|
|
||||||
|
class CodeBlock {
|
||||||
|
public:
|
||||||
|
explicit CodeBlock(std::size_t size)
|
||||||
|
: m_size(size)
|
||||||
|
{
|
||||||
|
#if defined(_WIN32)
|
||||||
|
m_memory = (std::uint32_t*)VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_JIT, -1, 0);
|
||||||
|
#else
|
||||||
|
m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (m_memory == nullptr)
|
||||||
|
throw std::bad_alloc{};
|
||||||
|
}
|
||||||
|
|
||||||
|
~CodeBlock()
|
||||||
|
{
|
||||||
|
if (m_memory == nullptr)
|
||||||
|
return;
|
||||||
|
|
||||||
|
#if defined(_WIN32)
|
||||||
|
VirtualFree((void*)m_memory, 0, MEM_RELEASE);
|
||||||
|
#else
|
||||||
|
munmap(m_memory, m_size);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
CodeBlock(const CodeBlock&) = delete;
|
||||||
|
CodeBlock& operator=(const CodeBlock&) = delete;
|
||||||
|
CodeBlock(CodeBlock&&) = delete;
|
||||||
|
CodeBlock& operator=(CodeBlock&&) = delete;
|
||||||
|
|
||||||
|
std::uint32_t* ptr() const
|
||||||
|
{
|
||||||
|
return m_memory;
|
||||||
|
}
|
||||||
|
|
||||||
|
void protect()
|
||||||
|
{
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
pthread_jit_write_protect_np(1);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void unprotect()
|
||||||
|
{
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
pthread_jit_write_protect_np(0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void invalidate(std::uint32_t* mem, std::size_t size)
|
||||||
|
{
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
sys_icache_invalidate(mem, size);
|
||||||
|
#else
|
||||||
|
static std::size_t icache_line_size = 0x10000, dcache_line_size = 0x10000;
|
||||||
|
|
||||||
|
std::uint64_t ctr;
|
||||||
|
__asm__ volatile("mrs %0, ctr_el0"
|
||||||
|
: "=r"(ctr));
|
||||||
|
|
||||||
|
const std::size_t isize = icache_line_size = std::min<std::size_t>(icache_line_size, 4 << ((ctr >> 0) & 0xf));
|
||||||
|
const std::size_t dsize = dcache_line_size = std::min<std::size_t>(dcache_line_size, 4 << ((ctr >> 16) & 0xf));
|
||||||
|
|
||||||
|
const std::uintptr_t end = (std::uintptr_t)mem + size;
|
||||||
|
|
||||||
|
for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(dsize - 1); addr < end; addr += dsize) {
|
||||||
|
__asm__ volatile("dc cvau, %0"
|
||||||
|
:
|
||||||
|
: "r"(addr)
|
||||||
|
: "memory");
|
||||||
|
}
|
||||||
|
__asm__ volatile("dsb ish\n"
|
||||||
|
:
|
||||||
|
:
|
||||||
|
: "memory");
|
||||||
|
|
||||||
|
for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(isize - 1); addr < end; addr += isize) {
|
||||||
|
__asm__ volatile("ic ivau, %0"
|
||||||
|
:
|
||||||
|
: "r"(addr)
|
||||||
|
: "memory");
|
||||||
|
}
|
||||||
|
__asm__ volatile("dsb ish\nisb\n"
|
||||||
|
:
|
||||||
|
:
|
||||||
|
: "memory");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void invalidate_all()
|
||||||
|
{
|
||||||
|
invalidate(m_memory, m_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
std::uint32_t* m_memory;
|
||||||
|
std::size_t m_size = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace oaknut
|
155
externals/dynarmic/externals/oaknut/include/oaknut/impl/arm64_encode_helpers.inc.hpp
vendored
Executable file
155
externals/dynarmic/externals/oaknut/include/oaknut/impl/arm64_encode_helpers.inc.hpp
vendored
Executable file
|
@ -0,0 +1,155 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
template<std::uint32_t mask_>
|
||||||
|
static constexpr std::uint32_t pdep(std::uint32_t val)
|
||||||
|
{
|
||||||
|
std::uint32_t mask = mask_;
|
||||||
|
std::uint32_t res = 0;
|
||||||
|
for (std::uint32_t bb = 1; mask; bb += bb) {
|
||||||
|
if (val & bb)
|
||||||
|
res |= mask & -mask;
|
||||||
|
mask &= mask - 1;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define OAKNUT_STD_ENCODE(TYPE, ACCESS, SIZE) \
|
||||||
|
template<std::uint32_t splat> \
|
||||||
|
std::uint32_t encode(TYPE v) \
|
||||||
|
{ \
|
||||||
|
static_assert(std::popcount(splat) == SIZE); \
|
||||||
|
return pdep<splat>(static_cast<std::uint32_t>(ACCESS)); \
|
||||||
|
}
|
||||||
|
|
||||||
|
OAKNUT_STD_ENCODE(RReg, v.index() & 31, 5)
|
||||||
|
OAKNUT_STD_ENCODE(VReg, v.index() & 31, 5)
|
||||||
|
OAKNUT_STD_ENCODE(VRegArranged, v.index() & 31, 5)
|
||||||
|
|
||||||
|
OAKNUT_STD_ENCODE(AddSubImm, v.m_encoded, 13)
|
||||||
|
OAKNUT_STD_ENCODE(BitImm32, v.m_encoded, 12)
|
||||||
|
OAKNUT_STD_ENCODE(BitImm64, v.m_encoded, 13)
|
||||||
|
OAKNUT_STD_ENCODE(LslShift<32>, v.m_encoded, 12)
|
||||||
|
OAKNUT_STD_ENCODE(LslShift<64>, v.m_encoded, 12)
|
||||||
|
OAKNUT_STD_ENCODE(FImm8, v.m_encoded, 8)
|
||||||
|
OAKNUT_STD_ENCODE(RepImm, v.m_encoded, 8)
|
||||||
|
|
||||||
|
OAKNUT_STD_ENCODE(Cond, v, 4)
|
||||||
|
OAKNUT_STD_ENCODE(AddSubExt, v, 3)
|
||||||
|
OAKNUT_STD_ENCODE(IndexExt, v, 3)
|
||||||
|
OAKNUT_STD_ENCODE(AddSubShift, v, 2)
|
||||||
|
OAKNUT_STD_ENCODE(LogShift, v, 2)
|
||||||
|
OAKNUT_STD_ENCODE(PstateField, v, 6)
|
||||||
|
OAKNUT_STD_ENCODE(SystemReg, v, 15)
|
||||||
|
OAKNUT_STD_ENCODE(AtOp, v, 7)
|
||||||
|
OAKNUT_STD_ENCODE(BarrierOp, v, 4)
|
||||||
|
OAKNUT_STD_ENCODE(DcOp, v, 10)
|
||||||
|
OAKNUT_STD_ENCODE(IcOp, v, 10)
|
||||||
|
OAKNUT_STD_ENCODE(PrfOp, v, 5)
|
||||||
|
OAKNUT_STD_ENCODE(TlbiOp, v, 10)
|
||||||
|
|
||||||
|
template<std::uint32_t splat>
|
||||||
|
std::uint32_t encode(MovImm16 v)
|
||||||
|
{
|
||||||
|
static_assert(std::popcount(splat) == 17 || std::popcount(splat) == 18);
|
||||||
|
if constexpr (std::popcount(splat) == 17) {
|
||||||
|
constexpr std::uint32_t mask = (1 << std::popcount(splat)) - 1;
|
||||||
|
if ((v.m_encoded & mask) != v.m_encoded)
|
||||||
|
throw "invalid MovImm16";
|
||||||
|
}
|
||||||
|
return pdep<splat>(v.m_encoded);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<std::uint32_t splat, std::size_t imm_size>
|
||||||
|
std::uint32_t encode(Imm<imm_size> v)
|
||||||
|
{
|
||||||
|
static_assert(std::popcount(splat) >= imm_size);
|
||||||
|
return pdep<splat>(v.value());
|
||||||
|
}
|
||||||
|
|
||||||
|
template<std::uint32_t splat, int A, int B>
|
||||||
|
std::uint32_t encode(ImmChoice<A, B> v)
|
||||||
|
{
|
||||||
|
static_assert(std::popcount(splat) == 1);
|
||||||
|
return pdep<splat>(v.m_encoded);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<std::uint32_t splat, int A, int B, int C, int D>
|
||||||
|
std::uint32_t encode(ImmChoice<A, B, C, D> v)
|
||||||
|
{
|
||||||
|
static_assert(std::popcount(splat) == 2);
|
||||||
|
return pdep<splat>(v.m_encoded);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<std::uint32_t splat, std::size_t size, std::size_t align>
|
||||||
|
std::uint32_t encode(SOffset<size, align> v)
|
||||||
|
{
|
||||||
|
static_assert(std::popcount(splat) == size - align);
|
||||||
|
return pdep<splat>(v.m_encoded);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<std::uint32_t splat, std::size_t size, std::size_t align>
|
||||||
|
std::uint32_t encode(POffset<size, align> v)
|
||||||
|
{
|
||||||
|
static_assert(std::popcount(splat) == size - align);
|
||||||
|
return pdep<splat>(v.m_encoded);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<std::uint32_t splat>
|
||||||
|
std::uint32_t encode(std::uint32_t v)
|
||||||
|
{
|
||||||
|
return pdep<splat>(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<std::uint32_t splat, typename T, size_t N>
|
||||||
|
std::uint32_t encode(List<T, N> v)
|
||||||
|
{
|
||||||
|
return encode<splat>(v.m_base);
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef OAKNUT_STD_ENCODE
|
||||||
|
|
||||||
|
void addsubext_lsl_correction(AddSubExt& ext, XRegSp)
|
||||||
|
{
|
||||||
|
if (ext == AddSubExt::LSL)
|
||||||
|
ext = AddSubExt::UXTX;
|
||||||
|
}
|
||||||
|
void addsubext_lsl_correction(AddSubExt& ext, WRegWsp)
|
||||||
|
{
|
||||||
|
if (ext == AddSubExt::LSL)
|
||||||
|
ext = AddSubExt::UXTW;
|
||||||
|
}
|
||||||
|
void addsubext_lsl_correction(AddSubExt& ext, XReg)
|
||||||
|
{
|
||||||
|
if (ext == AddSubExt::LSL)
|
||||||
|
ext = AddSubExt::UXTX;
|
||||||
|
}
|
||||||
|
void addsubext_lsl_correction(AddSubExt& ext, WReg)
|
||||||
|
{
|
||||||
|
if (ext == AddSubExt::LSL)
|
||||||
|
ext = AddSubExt::UXTW;
|
||||||
|
}
|
||||||
|
|
||||||
|
void addsubext_verify_reg_size(AddSubExt ext, RReg rm)
|
||||||
|
{
|
||||||
|
if (rm.bitsize() == 32 && (static_cast<int>(ext) & 0b011) != 0b011)
|
||||||
|
return;
|
||||||
|
if (rm.bitsize() == 64 && (static_cast<int>(ext) & 0b011) == 0b011)
|
||||||
|
return;
|
||||||
|
throw "invalid AddSubExt choice for rm size";
|
||||||
|
}
|
||||||
|
|
||||||
|
void indexext_verify_reg_size(IndexExt ext, RReg rm)
|
||||||
|
{
|
||||||
|
if (rm.bitsize() == 32 && (static_cast<int>(ext) & 1) == 0)
|
||||||
|
return;
|
||||||
|
if (rm.bitsize() == 64 && (static_cast<int>(ext) & 1) == 1)
|
||||||
|
return;
|
||||||
|
throw "invalid IndexExt choice for rm size";
|
||||||
|
}
|
||||||
|
|
||||||
|
void tbz_verify_reg_size(RReg rt, Imm<6> imm)
|
||||||
|
{
|
||||||
|
if (rt.bitsize() == 32 && imm.value() >= 32)
|
||||||
|
throw "invalid imm choice for rt size";
|
||||||
|
}
|
1709
externals/dynarmic/externals/oaknut/include/oaknut/impl/arm64_mnemonics.inc.hpp
vendored
Executable file
1709
externals/dynarmic/externals/oaknut/include/oaknut/impl/arm64_mnemonics.inc.hpp
vendored
Executable file
File diff suppressed because it is too large
Load diff
242
externals/dynarmic/externals/oaknut/include/oaknut/impl/enum.hpp
vendored
Executable file
242
externals/dynarmic/externals/oaknut/include/oaknut/impl/enum.hpp
vendored
Executable file
|
@ -0,0 +1,242 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
namespace oaknut {
|
||||||
|
|
||||||
|
struct PostIndexed {};
|
||||||
|
|
||||||
|
struct PreIndexed {};
|
||||||
|
|
||||||
|
enum class LslSymbol {
|
||||||
|
LSL,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class MslSymbol {
|
||||||
|
MSL,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class Cond {
|
||||||
|
EQ,
|
||||||
|
NE,
|
||||||
|
CS,
|
||||||
|
CC,
|
||||||
|
MI,
|
||||||
|
PL,
|
||||||
|
VS,
|
||||||
|
VC,
|
||||||
|
HI,
|
||||||
|
LS,
|
||||||
|
GE,
|
||||||
|
LT,
|
||||||
|
GT,
|
||||||
|
LE,
|
||||||
|
AL,
|
||||||
|
NV,
|
||||||
|
HS = CS,
|
||||||
|
LO = CC,
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr Cond invert(Cond c)
|
||||||
|
{
|
||||||
|
return static_cast<Cond>(static_cast<unsigned>(c) ^ 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
enum class AddSubExt {
|
||||||
|
UXTB,
|
||||||
|
UXTH,
|
||||||
|
UXTW,
|
||||||
|
UXTX,
|
||||||
|
SXTB,
|
||||||
|
SXTH,
|
||||||
|
SXTW,
|
||||||
|
SXTX,
|
||||||
|
LSL, // UXTW (32-bit) or UXTX (64-bit)
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class IndexExt {
|
||||||
|
UXTW = 0b010,
|
||||||
|
LSL = 0b011,
|
||||||
|
SXTW = 0b110,
|
||||||
|
SXTX = 0b111,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class AddSubShift {
|
||||||
|
LSL,
|
||||||
|
LSR,
|
||||||
|
ASR,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class LogShift {
|
||||||
|
LSL,
|
||||||
|
LSR,
|
||||||
|
ASR,
|
||||||
|
ROR,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class PstateField {
|
||||||
|
UAO = 0b000'011, // ARMv8.2-UAO
|
||||||
|
PAN = 0b000'100, // ARMv8.1-PAN
|
||||||
|
SPSel = 0b000'101,
|
||||||
|
DIT = 0b011'010, // ARMv8.4-DIT
|
||||||
|
DAIFSet = 0b011'110,
|
||||||
|
DAIFClr = 0b011'111,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class SystemReg {
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class AtOp {
|
||||||
|
S1E1R = 0b000'0'000,
|
||||||
|
S1E1W = 0b000'0'001,
|
||||||
|
S1E0R = 0b000'0'010,
|
||||||
|
S1E0W = 0b000'0'011,
|
||||||
|
S1E1RP = 0b000'1'000, // ARMv8.2-ATS1E1
|
||||||
|
S1E1WP = 0b000'1'001, // ARMv8.2-ATS1E1
|
||||||
|
S1E2R = 0b100'0'000,
|
||||||
|
S1E2W = 0b100'0'001,
|
||||||
|
S12E1R = 0b100'0'100,
|
||||||
|
S12E1W = 0b100'0'101,
|
||||||
|
S12E0R = 0b100'0'110,
|
||||||
|
S12E0W = 0b100'0'111,
|
||||||
|
S1E3R = 0b110'0'000,
|
||||||
|
S1E3W = 0b110'0'001,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class BarrierOp {
|
||||||
|
SY = 0b1111,
|
||||||
|
ST = 0b1110,
|
||||||
|
LD = 0b1101,
|
||||||
|
ISH = 0b1011,
|
||||||
|
ISHST = 0b1010,
|
||||||
|
ISHLD = 0b1001,
|
||||||
|
NSH = 0b0111,
|
||||||
|
NSHST = 0b0110,
|
||||||
|
NSHLD = 0b0101,
|
||||||
|
OSH = 0b0011,
|
||||||
|
OSHST = 0b0010,
|
||||||
|
OSHLD = 0b0001,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class DcOp {
|
||||||
|
IVAC = 0b000'0110'001,
|
||||||
|
ISW = 0b000'0110'010,
|
||||||
|
CSW = 0b000'1010'010,
|
||||||
|
CISW = 0b000'1110'010,
|
||||||
|
ZVA = 0b011'0100'001,
|
||||||
|
CVAC = 0b011'1010'001,
|
||||||
|
CVAU = 0b011'1011'001,
|
||||||
|
CVAP = 0b011'1100'001, // ARMv8.2-DCPoP
|
||||||
|
CIVAC = 0b011'1110'001,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class IcOp {
|
||||||
|
IALLUIS = 0b000'0001'000,
|
||||||
|
IALLU = 0b000'0101'000,
|
||||||
|
IVAU = 0b011'0101'001,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class PrfOp {
|
||||||
|
PLDL1KEEP = 0b00'00'0,
|
||||||
|
PLDL1STRM = 0b00'00'1,
|
||||||
|
PLDL2KEEP = 0b00'01'0,
|
||||||
|
PLDL2STRM = 0b00'01'1,
|
||||||
|
PLDL3KEEP = 0b00'10'0,
|
||||||
|
PLDL3STRM = 0b00'10'1,
|
||||||
|
PLIL1KEEP = 0b01'00'0,
|
||||||
|
PLIL1STRM = 0b01'00'1,
|
||||||
|
PLIL2KEEP = 0b01'01'0,
|
||||||
|
PLIL2STRM = 0b01'01'1,
|
||||||
|
PLIL3KEEP = 0b01'10'0,
|
||||||
|
PLIL3STRM = 0b01'10'1,
|
||||||
|
PSTL1KEEP = 0b10'00'0,
|
||||||
|
PSTL1STRM = 0b10'00'1,
|
||||||
|
PSTL2KEEP = 0b10'01'0,
|
||||||
|
PSTL2STRM = 0b10'01'1,
|
||||||
|
PSTL3KEEP = 0b10'10'0,
|
||||||
|
PSTL3STRM = 0b10'10'1,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class TlbiOp {
|
||||||
|
VMALLE1OS = 0b000'0001'000, // ARMv8.4-TLBI
|
||||||
|
VAE1OS = 0b000'0001'001, // ARMv8.4-TLBI
|
||||||
|
ASIDE1OS = 0b000'0001'010, // ARMv8.4-TLBI
|
||||||
|
VAAE1OS = 0b000'0001'011, // ARMv8.4-TLBI
|
||||||
|
VALE1OS = 0b000'0001'101, // ARMv8.4-TLBI
|
||||||
|
VAALE1OS = 0b000'0001'111, // ARMv8.4-TLBI
|
||||||
|
RVAE1IS = 0b000'0010'001, // ARMv8.4-TLBI
|
||||||
|
RVAAE1IS = 0b000'0010'011, // ARMv8.4-TLBI
|
||||||
|
RVALE1IS = 0b000'0010'101, // ARMv8.4-TLBI
|
||||||
|
RVAALE1IS = 0b000'0010'111, // ARMv8.4-TLBI
|
||||||
|
VMALLE1IS = 0b000'0011'000,
|
||||||
|
VAE1IS = 0b000'0011'001,
|
||||||
|
ASIDE1IS = 0b000'0011'010,
|
||||||
|
VAAE1IS = 0b000'0011'011,
|
||||||
|
VALE1IS = 0b000'0011'101,
|
||||||
|
VAALE1IS = 0b000'0011'111,
|
||||||
|
RVAE1OS = 0b000'0101'001, // ARMv8.4-TLBI
|
||||||
|
RVAAE1OS = 0b000'0101'011, // ARMv8.4-TLBI
|
||||||
|
RVALE1OS = 0b000'0101'101, // ARMv8.4-TLBI
|
||||||
|
RVAALE1OS = 0b000'0101'111, // ARMv8.4-TLBI
|
||||||
|
RVAE1 = 0b000'0110'001, // ARMv8.4-TLBI
|
||||||
|
RVAAE1 = 0b000'0110'011, // ARMv8.4-TLBI
|
||||||
|
RVALE1 = 0b000'0110'101, // ARMv8.4-TLBI
|
||||||
|
RVAALE1 = 0b000'0110'111, // ARMv8.4-TLBI
|
||||||
|
VMALLE1 = 0b000'0111'000,
|
||||||
|
VAE1 = 0b000'0111'001,
|
||||||
|
ASIDE1 = 0b000'0111'010,
|
||||||
|
VAAE1 = 0b000'0111'011,
|
||||||
|
VALE1 = 0b000'0111'101,
|
||||||
|
VAALE1 = 0b000'0111'111,
|
||||||
|
IPAS2E1IS = 0b100'0000'001,
|
||||||
|
RIPAS2E1IS = 0b100'0000'010, // ARMv8.4-TLBI
|
||||||
|
IPAS2LE1IS = 0b100'0000'101,
|
||||||
|
RIPAS2LE1IS = 0b100'0000'110, // ARMv8.4-TLBI
|
||||||
|
ALLE2OS = 0b100'0001'000, // ARMv8.4-TLBI
|
||||||
|
VAE2OS = 0b100'0001'001, // ARMv8.4-TLBI
|
||||||
|
ALLE1OS = 0b100'0001'100, // ARMv8.4-TLBI
|
||||||
|
VALE2OS = 0b100'0001'101, // ARMv8.4-TLBI
|
||||||
|
VMALLS12E1OS = 0b100'0001'110, // ARMv8.4-TLBI
|
||||||
|
RVAE2IS = 0b100'0010'001, // ARMv8.4-TLBI
|
||||||
|
RVALE2IS = 0b100'0010'101, // ARMv8.4-TLBI
|
||||||
|
ALLE2IS = 0b100'0011'000,
|
||||||
|
VAE2IS = 0b100'0011'001,
|
||||||
|
ALLE1IS = 0b100'0011'100,
|
||||||
|
VALE2IS = 0b100'0011'101,
|
||||||
|
VMALLS12E1IS = 0b100'0011'110,
|
||||||
|
IPAS2E1OS = 0b100'0100'000, // ARMv8.4-TLBI
|
||||||
|
IPAS2E1 = 0b100'0100'001,
|
||||||
|
RIPAS2E1 = 0b100'0100'010, // ARMv8.4-TLBI
|
||||||
|
RIPAS2E1OS = 0b100'0100'011, // ARMv8.4-TLBI
|
||||||
|
IPAS2LE1OS = 0b100'0100'100, // ARMv8.4-TLBI
|
||||||
|
IPAS2LE1 = 0b100'0100'101,
|
||||||
|
RIPAS2LE1 = 0b100'0100'110, // ARMv8.4-TLBI
|
||||||
|
RIPAS2LE1OS = 0b100'0100'111, // ARMv8.4-TLBI
|
||||||
|
RVAE2OS = 0b100'0101'001, // ARMv8.4-TLBI
|
||||||
|
RVALE2OS = 0b100'0101'101, // ARMv8.4-TLBI
|
||||||
|
RVAE2 = 0b100'0110'001, // ARMv8.4-TLBI
|
||||||
|
RVALE2 = 0b100'0110'101, // ARMv8.4-TLBI
|
||||||
|
ALLE2 = 0b100'0111'000,
|
||||||
|
VAE2 = 0b100'0111'001,
|
||||||
|
ALLE1 = 0b100'0111'100,
|
||||||
|
VALE2 = 0b100'0111'101,
|
||||||
|
VMALLS12E1 = 0b100'0111'110,
|
||||||
|
ALLE3OS = 0b110'0001'000, // ARMv8.4-TLBI
|
||||||
|
VAE3OS = 0b110'0001'001, // ARMv8.4-TLBI
|
||||||
|
VALE3OS = 0b110'0001'101, // ARMv8.4-TLBI
|
||||||
|
RVAE3IS = 0b110'0010'001, // ARMv8.4-TLBI
|
||||||
|
RVALE3IS = 0b110'0010'101, // ARMv8.4-TLBI
|
||||||
|
ALLE3IS = 0b110'0011'000,
|
||||||
|
VAE3IS = 0b110'0011'001,
|
||||||
|
VALE3IS = 0b110'0011'101,
|
||||||
|
RVAE3OS = 0b110'0101'001, // ARMv8.4-TLBI
|
||||||
|
RVALE3OS = 0b110'0101'101, // ARMv8.4-TLBI
|
||||||
|
RVAE3 = 0b110'0110'001, // ARMv8.4-TLBI
|
||||||
|
RVALE3 = 0b110'0110'101, // ARMv8.4-TLBI
|
||||||
|
ALLE3 = 0b110'0111'000,
|
||||||
|
VAE3 = 0b110'0111'001,
|
||||||
|
VALE3 = 0b110'0111'101,
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace oaknut
|
9163
externals/dynarmic/externals/oaknut/include/oaknut/impl/fpsimd_mnemonics.inc.hpp
vendored
Executable file
9163
externals/dynarmic/externals/oaknut/include/oaknut/impl/fpsimd_mnemonics.inc.hpp
vendored
Executable file
File diff suppressed because it is too large
Load diff
317
externals/dynarmic/externals/oaknut/include/oaknut/impl/imm.hpp
vendored
Executable file
317
externals/dynarmic/externals/oaknut/include/oaknut/impl/imm.hpp
vendored
Executable file
|
@ -0,0 +1,317 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <bit>
|
||||||
|
#include <compare>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
|
namespace oaknut {
|
||||||
|
|
||||||
|
template<std::size_t bit_size_>
|
||||||
|
struct Imm {
|
||||||
|
public:
|
||||||
|
static_assert(bit_size_ != 0 && bit_size_ <= 32, "Invalid bit_size");
|
||||||
|
static constexpr std::size_t bit_size = bit_size_;
|
||||||
|
static constexpr std::uint32_t mask = (1 << bit_size) - 1;
|
||||||
|
|
||||||
|
constexpr /* implicit */ Imm(std::uint32_t value_)
|
||||||
|
: m_value(value_)
|
||||||
|
{
|
||||||
|
if (!is_valid(value_))
|
||||||
|
throw "outsized Imm value";
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr auto operator<=>(const Imm& other) const { return m_value <=> other.m_value; }
|
||||||
|
constexpr auto operator<=>(std::uint32_t other) const { return operator<=>(Imm{other}); }
|
||||||
|
|
||||||
|
constexpr std::uint32_t value() const { return m_value; }
|
||||||
|
|
||||||
|
static bool is_valid(std::uint32_t value_)
|
||||||
|
{
|
||||||
|
return ((value_ & mask) == value_);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
std::uint32_t m_value;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class AddSubImmShift {
|
||||||
|
SHL_0,
|
||||||
|
SHL_12,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AddSubImm {
|
||||||
|
public:
|
||||||
|
constexpr AddSubImm(std::uint32_t value_, AddSubImmShift shift_)
|
||||||
|
: m_encoded(value_ | ((shift_ == AddSubImmShift::SHL_12) ? 1 << 12 : 0))
|
||||||
|
{
|
||||||
|
if ((value_ & 0xFFF) != value_)
|
||||||
|
throw "invalid AddSubImm";
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr /* implicit */ AddSubImm(std::uint64_t value_)
|
||||||
|
{
|
||||||
|
if ((value_ & 0xFFF) == value_) {
|
||||||
|
m_encoded = value_;
|
||||||
|
} else if ((value_ & 0xFFF000) == value_) {
|
||||||
|
m_encoded = (value_ >> 12) | (1 << 12);
|
||||||
|
} else {
|
||||||
|
throw "invalid AddSubImm";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr bool is_valid(std::uint64_t value_)
|
||||||
|
{
|
||||||
|
return ((value_ & 0xFFF) == value_) || ((value_ & 0xFFF000) == value_);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
std::uint32_t m_encoded;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class MovImm16Shift {
|
||||||
|
SHL_0,
|
||||||
|
SHL_16,
|
||||||
|
SHL_32,
|
||||||
|
SHL_48,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MovImm16 {
|
||||||
|
public:
|
||||||
|
MovImm16(std::uint16_t value_, MovImm16Shift shift_)
|
||||||
|
: m_encoded(static_cast<std::uint32_t>(value_) | (static_cast<std::uint32_t>(shift_) << 16))
|
||||||
|
{}
|
||||||
|
|
||||||
|
constexpr /* implict */ MovImm16(std::uint64_t value_)
|
||||||
|
{
|
||||||
|
std::uint32_t shift = 0;
|
||||||
|
while (value_ != 0) {
|
||||||
|
const std::uint32_t lsw = static_cast<std::uint16_t>(value_ & 0xFFFF);
|
||||||
|
if (value_ == lsw) {
|
||||||
|
m_encoded = lsw | (shift << 16);
|
||||||
|
return;
|
||||||
|
} else if (lsw != 0) {
|
||||||
|
throw "invalid MovImm16";
|
||||||
|
}
|
||||||
|
value_ >>= 16;
|
||||||
|
shift++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr bool is_valid(std::uint64_t value_)
|
||||||
|
{
|
||||||
|
return ((value_ & 0xFFFF) == value_) || ((value_ & 0xFFFF0000) == value_) || ((value_ & 0xFFFF00000000) == value_) || ((value_ & 0xFFFF000000000000) == value_);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
std::uint32_t m_encoded = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
|
||||||
|
constexpr std::optional<std::uint32_t> encode_bit_imm(std::uint64_t value)
|
||||||
|
{
|
||||||
|
if (value == 0 || (~value) == 0)
|
||||||
|
return std::nullopt;
|
||||||
|
|
||||||
|
const std::size_t rotation = std::countr_zero(value & (value + 1));
|
||||||
|
const std::uint64_t rot_value = std::rotr(value, rotation);
|
||||||
|
|
||||||
|
const std::size_t esize = std::countr_zero(rot_value & (rot_value + 1));
|
||||||
|
const std::size_t ones = std::countr_one(rot_value);
|
||||||
|
|
||||||
|
if (std::rotr(value, esize) != value)
|
||||||
|
return std::nullopt;
|
||||||
|
|
||||||
|
const std::uint32_t S = ((-esize) << 1) | (ones - 1);
|
||||||
|
const std::uint32_t R = (esize - rotation) & (esize - 1);
|
||||||
|
const std::uint32_t N = (~S >> 6) & 1;
|
||||||
|
|
||||||
|
return static_cast<std::uint32_t>((S & 0b111111) | (R << 6) | (N << 12));
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr std::optional<std::uint32_t> encode_bit_imm(std::uint32_t value)
|
||||||
|
{
|
||||||
|
const std::uint64_t value_u64 = (static_cast<std::uint64_t>(value) << 32) | static_cast<std::uint64_t>(value);
|
||||||
|
const auto result = encode_bit_imm(value_u64);
|
||||||
|
if (result && (*result & 0b0'111111'111111) != *result)
|
||||||
|
return std::nullopt;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
|
struct BitImm32 {
|
||||||
|
public:
|
||||||
|
constexpr BitImm32(Imm<6> imms, Imm<6> immr)
|
||||||
|
: m_encoded((imms.value() << 6) | immr.value())
|
||||||
|
{}
|
||||||
|
|
||||||
|
constexpr /* implicit */ BitImm32(std::uint32_t value)
|
||||||
|
{
|
||||||
|
const auto encoded = detail::encode_bit_imm(value);
|
||||||
|
if (!encoded || (*encoded & 0x1000) != 0)
|
||||||
|
throw "invalid BitImm32";
|
||||||
|
m_encoded = *encoded;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
std::uint32_t m_encoded;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct BitImm64 {
|
||||||
|
public:
|
||||||
|
constexpr BitImm64(bool N, Imm<6> imms, Imm<6> immr)
|
||||||
|
: m_encoded((N ? 1 << 12 : 0) | (imms.value() << 6) | immr.value())
|
||||||
|
{}
|
||||||
|
|
||||||
|
constexpr /* implicit */ BitImm64(std::uint64_t value)
|
||||||
|
{
|
||||||
|
const auto encoded = detail::encode_bit_imm(value);
|
||||||
|
if (!encoded)
|
||||||
|
throw "invalid BitImm64";
|
||||||
|
m_encoded = *encoded;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
std::uint32_t m_encoded;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct FImm8 {
|
||||||
|
public:
|
||||||
|
constexpr explicit FImm8(std::uint8_t encoded)
|
||||||
|
: m_encoded(encoded)
|
||||||
|
{}
|
||||||
|
|
||||||
|
constexpr FImm8(bool sign, Imm<3> exp, Imm<4> mantissa)
|
||||||
|
: m_encoded((sign ? 1 << 7 : 0) | (exp.value() << 4) | (mantissa.value()))
|
||||||
|
{}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
std::uint32_t m_encoded;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct RepImm {
|
||||||
|
public:
|
||||||
|
constexpr explicit RepImm(std::uint8_t encoded)
|
||||||
|
: m_encoded(encoded)
|
||||||
|
{}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
std::uint32_t m_encoded;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<int A>
|
||||||
|
struct ImmConst {
|
||||||
|
constexpr /* implicit */ ImmConst(int value)
|
||||||
|
{
|
||||||
|
if (value != A) {
|
||||||
|
throw "invalid ImmConst";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ImmConstFZero {
|
||||||
|
constexpr /* implicit */ ImmConstFZero(double value)
|
||||||
|
{
|
||||||
|
if (value != 0) {
|
||||||
|
throw "invalid ImmConstFZero";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<int...>
|
||||||
|
struct ImmChoice;
|
||||||
|
|
||||||
|
template<int A, int B>
|
||||||
|
struct ImmChoice<A, B> {
|
||||||
|
constexpr /* implicit */ ImmChoice(int value)
|
||||||
|
{
|
||||||
|
if (value == A) {
|
||||||
|
m_encoded = 0;
|
||||||
|
} else if (value == B) {
|
||||||
|
m_encoded = 1;
|
||||||
|
} else {
|
||||||
|
throw "invalid ImmChoice";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
std::uint32_t m_encoded;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<int A, int B, int C, int D>
|
||||||
|
struct ImmChoice<A, B, C, D> {
|
||||||
|
constexpr /* implicit */ ImmChoice(int value)
|
||||||
|
{
|
||||||
|
if (value == A) {
|
||||||
|
m_encoded = 0;
|
||||||
|
} else if (value == B) {
|
||||||
|
m_encoded = 1;
|
||||||
|
} else if (value == C) {
|
||||||
|
m_encoded = 2;
|
||||||
|
} else if (value == D) {
|
||||||
|
m_encoded = 3;
|
||||||
|
} else {
|
||||||
|
throw "invalid ImmChoice";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
std::uint32_t m_encoded;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<unsigned Start, unsigned End>
|
||||||
|
struct ImmRange {
|
||||||
|
constexpr /* implicit */ ImmRange(unsigned value_)
|
||||||
|
: m_value(value_)
|
||||||
|
{
|
||||||
|
if (value_ < Start || value_ > End) {
|
||||||
|
throw "invalid ImmRange";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr unsigned value() const { return m_value; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
unsigned m_value;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<std::size_t max_value>
|
||||||
|
struct LslShift {
|
||||||
|
constexpr /* implicit */ LslShift(std::size_t amount)
|
||||||
|
: m_encoded((((-amount) & (max_value - 1)) << 6) | (max_value - amount - 1))
|
||||||
|
{
|
||||||
|
if (amount >= max_value)
|
||||||
|
throw "LslShift out of range";
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
std::uint32_t m_encoded;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace oaknut
|
80
externals/dynarmic/externals/oaknut/include/oaknut/impl/list.hpp
vendored
Executable file
80
externals/dynarmic/externals/oaknut/include/oaknut/impl/list.hpp
vendored
Executable file
|
@ -0,0 +1,80 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <tuple>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
namespace oaknut {
|
||||||
|
|
||||||
|
struct Elem;
|
||||||
|
template<typename>
|
||||||
|
struct ElemSelector;
|
||||||
|
struct VRegArranged;
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
|
||||||
|
template<typename>
|
||||||
|
struct is_instance_of_ElemSelector : std::false_type {};
|
||||||
|
|
||||||
|
template<typename E>
|
||||||
|
struct is_instance_of_ElemSelector<ElemSelector<E>> : std::true_type {};
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
constexpr bool is_instance_of_ElemSelector_v = is_instance_of_ElemSelector<T>::value;
|
||||||
|
|
||||||
|
struct BaseOnlyTag {};
|
||||||
|
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
|
template<typename T, std::size_t N>
|
||||||
|
struct List {
|
||||||
|
template<typename... U>
|
||||||
|
constexpr explicit List(U... args)
|
||||||
|
: m_base(std::get<0>(std::tie(args...)))
|
||||||
|
{
|
||||||
|
static_assert((std::is_same_v<T, U> && ...));
|
||||||
|
static_assert(sizeof...(args) == N);
|
||||||
|
static_assert(std::is_base_of_v<VRegArranged, T> || std::is_base_of_v<Elem, T> || detail::is_instance_of_ElemSelector_v<T>);
|
||||||
|
|
||||||
|
if (!verify(std::index_sequence_for<U...>{}, args...))
|
||||||
|
throw "invalid List";
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr auto operator[](unsigned elem_index) const
|
||||||
|
{
|
||||||
|
using S = decltype(m_base[elem_index]);
|
||||||
|
return List<S, N>(detail::BaseOnlyTag{}, m_base[elem_index]);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<typename>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
template<typename, std::size_t>
|
||||||
|
friend struct List;
|
||||||
|
|
||||||
|
constexpr explicit List(detail::BaseOnlyTag, T base_)
|
||||||
|
: m_base(base_)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename... U, std::size_t... indexes>
|
||||||
|
constexpr bool verify(std::index_sequence<indexes...>, U... args)
|
||||||
|
{
|
||||||
|
if constexpr (std::is_base_of_v<VRegArranged, T>) {
|
||||||
|
return (((m_base.index() + indexes) % 32 == static_cast<std::size_t>(args.index())) && ...);
|
||||||
|
} else if constexpr (std::is_base_of_v<Elem, T>) {
|
||||||
|
return (((m_base.reg_index() + indexes) % 32 == static_cast<std::size_t>(args.reg_index()) && m_base.elem_index() == args.elem_index()) && ...);
|
||||||
|
} else {
|
||||||
|
return (((m_base.reg_index() + indexes) % 32 == static_cast<std::size_t>(args.reg_index())) && ...);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
T m_base;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename... U>
|
||||||
|
List(U...) -> List<std::common_type_t<U...>, sizeof...(U)>;
|
||||||
|
|
||||||
|
} // namespace oaknut
|
19
externals/dynarmic/externals/oaknut/include/oaknut/impl/multi_typed_name.hpp
vendored
Executable file
19
externals/dynarmic/externals/oaknut/include/oaknut/impl/multi_typed_name.hpp
vendored
Executable file
|
@ -0,0 +1,19 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
namespace oaknut {
|
||||||
|
|
||||||
|
template<auto... Vs>
|
||||||
|
struct MultiTypedName;
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct MultiTypedName<> {};
|
||||||
|
|
||||||
|
template<auto V, auto... Vs>
|
||||||
|
struct MultiTypedName<V, Vs...> : public MultiTypedName<Vs...> {
|
||||||
|
constexpr operator decltype(V)() const { return V; }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace oaknut
|
129
externals/dynarmic/externals/oaknut/include/oaknut/impl/offset.hpp
vendored
Executable file
129
externals/dynarmic/externals/oaknut/include/oaknut/impl/offset.hpp
vendored
Executable file
|
@ -0,0 +1,129 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <variant>
|
||||||
|
|
||||||
|
namespace oaknut {
|
||||||
|
|
||||||
|
struct Label;
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
|
||||||
|
constexpr std::uint64_t inverse_mask_from_size(std::size_t size)
|
||||||
|
{
|
||||||
|
return (~std::uint64_t{0}) << size;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr std::uint64_t mask_from_size(std::size_t size)
|
||||||
|
{
|
||||||
|
return (~std::uint64_t{0}) >> (64 - size);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<std::size_t bit_count>
|
||||||
|
constexpr std::uint64_t sign_extend(std::uint64_t value)
|
||||||
|
{
|
||||||
|
static_assert(bit_count != 0, "cannot sign-extend zero-sized value");
|
||||||
|
constexpr size_t shift_amount = 64 - bit_count;
|
||||||
|
return static_cast<std::uint64_t>(static_cast<std::int64_t>(value << shift_amount) >> shift_amount);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
|
template<std::size_t bitsize, std::size_t alignment>
|
||||||
|
struct AddrOffset {
|
||||||
|
AddrOffset(std::ptrdiff_t diff)
|
||||||
|
: m_payload(encode(diff))
|
||||||
|
{}
|
||||||
|
|
||||||
|
AddrOffset(Label& label)
|
||||||
|
: m_payload(&label)
|
||||||
|
{}
|
||||||
|
|
||||||
|
AddrOffset(void* ptr)
|
||||||
|
: m_payload(ptr)
|
||||||
|
{}
|
||||||
|
|
||||||
|
static std::uint32_t encode(std::ptrdiff_t diff)
|
||||||
|
{
|
||||||
|
const std::uint64_t diff_u64 = static_cast<std::uint64_t>(diff);
|
||||||
|
if (detail::sign_extend<bitsize>(diff_u64) != diff_u64)
|
||||||
|
throw "out of range";
|
||||||
|
if (diff_u64 != (diff_u64 & detail::inverse_mask_from_size(alignment)))
|
||||||
|
throw "misalignment";
|
||||||
|
|
||||||
|
return static_cast<std::uint32_t>((diff_u64 & detail::mask_from_size(bitsize)) >> alignment);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
std::variant<std::uint32_t, Label*, void*> m_payload;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<std::size_t bitsize>
|
||||||
|
struct PageOffset {
|
||||||
|
PageOffset(void* ptr)
|
||||||
|
: m_payload(ptr)
|
||||||
|
{}
|
||||||
|
|
||||||
|
PageOffset(Label& label)
|
||||||
|
: m_payload(&label)
|
||||||
|
{}
|
||||||
|
|
||||||
|
static std::uint32_t encode(std::uintptr_t current_addr, std::uintptr_t target)
|
||||||
|
{
|
||||||
|
const std::int64_t page_diff = (static_cast<std::int64_t>(target) >> 12) - (static_cast<std::int64_t>(current_addr) >> 12);
|
||||||
|
if (detail::sign_extend<bitsize>(page_diff) != page_diff)
|
||||||
|
throw "out of range";
|
||||||
|
return static_cast<std::uint32_t>(page_diff & detail::mask_from_size(bitsize));
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
std::variant<Label*, void*> m_payload;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<std::size_t bitsize, std::size_t alignment>
|
||||||
|
struct SOffset {
|
||||||
|
SOffset(std::int64_t offset)
|
||||||
|
{
|
||||||
|
const std::uint64_t diff_u64 = static_cast<std::uint64_t>(offset);
|
||||||
|
if (detail::sign_extend<bitsize>(diff_u64) != diff_u64)
|
||||||
|
throw "out of range";
|
||||||
|
if (diff_u64 != (diff_u64 & detail::inverse_mask_from_size(alignment)))
|
||||||
|
throw "misalignment";
|
||||||
|
|
||||||
|
m_encoded = static_cast<std::uint32_t>((diff_u64 & detail::mask_from_size(bitsize)) >> alignment);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
std::uint32_t m_encoded;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<std::size_t bitsize, std::size_t alignment>
|
||||||
|
struct POffset {
|
||||||
|
POffset(std::int64_t offset)
|
||||||
|
{
|
||||||
|
const std::uint64_t diff_u64 = static_cast<std::uint64_t>(offset);
|
||||||
|
if (diff_u64 > detail::mask_from_size(bitsize))
|
||||||
|
throw "out of range";
|
||||||
|
if (diff_u64 != (diff_u64 & detail::inverse_mask_from_size(alignment)))
|
||||||
|
throw "misalignment";
|
||||||
|
|
||||||
|
m_encoded = static_cast<std::uint32_t>((diff_u64 & detail::mask_from_size(bitsize)) >> alignment);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
std::uint32_t m_encoded;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace oaknut
|
441
externals/dynarmic/externals/oaknut/include/oaknut/impl/reg.hpp
vendored
Executable file
441
externals/dynarmic/externals/oaknut/include/oaknut/impl/reg.hpp
vendored
Executable file
|
@ -0,0 +1,441 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
|
namespace oaknut {
|
||||||
|
|
||||||
|
struct Reg;
|
||||||
|
|
||||||
|
struct RReg;
|
||||||
|
struct ZrReg;
|
||||||
|
struct WzrReg;
|
||||||
|
struct XReg;
|
||||||
|
struct WReg;
|
||||||
|
struct SpReg;
|
||||||
|
struct WspReg;
|
||||||
|
struct XRegSp;
|
||||||
|
struct XRegWsp;
|
||||||
|
|
||||||
|
struct VReg;
|
||||||
|
struct VRegArranged;
|
||||||
|
struct BReg;
|
||||||
|
struct HReg;
|
||||||
|
struct SReg;
|
||||||
|
struct DReg;
|
||||||
|
struct QReg;
|
||||||
|
struct VReg_8B;
|
||||||
|
struct VReg_4H;
|
||||||
|
struct VReg_2S;
|
||||||
|
struct VReg_1D;
|
||||||
|
struct VReg_16B;
|
||||||
|
struct VReg_8H;
|
||||||
|
struct VReg_4S;
|
||||||
|
struct VReg_2D;
|
||||||
|
struct VReg_1Q;
|
||||||
|
|
||||||
|
struct VRegSelector;
|
||||||
|
|
||||||
|
template<typename Elem>
|
||||||
|
struct ElemSelector;
|
||||||
|
struct BElem;
|
||||||
|
struct HElem;
|
||||||
|
struct SElem;
|
||||||
|
struct DElem;
|
||||||
|
|
||||||
|
struct Reg {
|
||||||
|
constexpr explicit Reg(bool is_vector_, unsigned bitsize_, int index_)
|
||||||
|
: m_index(index_)
|
||||||
|
, m_bitsize(bitsize_)
|
||||||
|
, m_is_vector(is_vector_)
|
||||||
|
{
|
||||||
|
assert(index_ >= -1 && index_ <= 31);
|
||||||
|
assert(bitsize_ != 0 && (bitsize_ & (bitsize_ - 1)) == 0 && "Bitsize must be a power of two");
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr int index() const { return m_index; }
|
||||||
|
constexpr unsigned bitsize() const { return m_bitsize; }
|
||||||
|
constexpr bool is_vector() const { return m_is_vector; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
int m_index : 8;
|
||||||
|
unsigned m_bitsize : 8;
|
||||||
|
bool m_is_vector;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct RReg : public Reg {
|
||||||
|
constexpr explicit RReg(unsigned bitsize_, int index_)
|
||||||
|
: Reg(false, bitsize_, index_)
|
||||||
|
{
|
||||||
|
assert(bitsize_ == 32 || bitsize_ == 64);
|
||||||
|
}
|
||||||
|
|
||||||
|
XReg toX() const;
|
||||||
|
WReg toW() const;
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ZrReg : public RReg {
|
||||||
|
constexpr explicit ZrReg()
|
||||||
|
: RReg(64, 31) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct WzrReg : public RReg {
|
||||||
|
constexpr explicit WzrReg()
|
||||||
|
: RReg(32, 31) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct XReg : public RReg {
|
||||||
|
constexpr explicit XReg(int index_)
|
||||||
|
: RReg(64, index_) {}
|
||||||
|
|
||||||
|
constexpr /* implicit */ XReg(ZrReg)
|
||||||
|
: RReg(64, 31) {}
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct WReg : public RReg {
|
||||||
|
constexpr explicit WReg(int index_)
|
||||||
|
: RReg(32, index_) {}
|
||||||
|
|
||||||
|
constexpr /* implicit */ WReg(WzrReg)
|
||||||
|
: RReg(32, 31) {}
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline XReg RReg::toX() const
|
||||||
|
{
|
||||||
|
if (index() == -1)
|
||||||
|
throw "cannot convert SP/WSP to XReg";
|
||||||
|
return XReg{index()};
|
||||||
|
}
|
||||||
|
|
||||||
|
inline WReg RReg::toW() const
|
||||||
|
{
|
||||||
|
if (index() == -1)
|
||||||
|
throw "cannot convert SP/WSP to WReg";
|
||||||
|
return WReg{index()};
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SpReg : public RReg {
|
||||||
|
constexpr explicit SpReg()
|
||||||
|
: RReg(64, -1) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct WspReg : public RReg {
|
||||||
|
constexpr explicit WspReg()
|
||||||
|
: RReg(64, -1) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct XRegSp : public RReg {
|
||||||
|
constexpr /* implict */ XRegSp(SpReg)
|
||||||
|
: RReg(64, -1) {}
|
||||||
|
|
||||||
|
constexpr /* implict */ XRegSp(XReg xr)
|
||||||
|
: RReg(64, xr.index())
|
||||||
|
{
|
||||||
|
if (xr.index() == 31)
|
||||||
|
throw "unexpected ZR passed into an XRegSp";
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct WRegWsp : public RReg {
|
||||||
|
constexpr /* implict */ WRegWsp(WspReg)
|
||||||
|
: RReg(32, -1) {}
|
||||||
|
|
||||||
|
constexpr /* implict */ WRegWsp(WReg wr)
|
||||||
|
: RReg(32, wr.index())
|
||||||
|
{
|
||||||
|
if (wr.index() == 31)
|
||||||
|
throw "unexpected WZR passed into an WRegWsp";
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct VReg : public Reg {
|
||||||
|
constexpr explicit VReg(unsigned bitsize_, int index_)
|
||||||
|
: Reg(true, bitsize_, index_)
|
||||||
|
{
|
||||||
|
assert(bitsize_ == 8 || bitsize_ == 16 || bitsize_ == 32 || bitsize_ == 64 || bitsize_ == 128);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr BReg toB() const;
|
||||||
|
constexpr HReg toH() const;
|
||||||
|
constexpr SReg toS() const;
|
||||||
|
constexpr DReg toD() const;
|
||||||
|
constexpr QReg toQ() const;
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct VRegArranged : public Reg {
|
||||||
|
constexpr explicit VRegArranged(unsigned bitsize_, int index_, unsigned esize_)
|
||||||
|
: Reg(true, bitsize_, index_), m_esize(esize_)
|
||||||
|
{
|
||||||
|
assert(bitsize_ == 64 || bitsize_ == 128);
|
||||||
|
assert(esize_ != 0 && (esize_ & (esize_ - 1)) == 0 && "esize must be a power of two");
|
||||||
|
assert(esize_ <= bitsize_);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
|
||||||
|
private:
|
||||||
|
int m_esize : 8;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct BReg : public VReg {
|
||||||
|
constexpr explicit BReg(int index_)
|
||||||
|
: VReg(8, index_)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct HReg : public VReg {
|
||||||
|
constexpr explicit HReg(int index_)
|
||||||
|
: VReg(16, index_)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct SReg : public VReg {
|
||||||
|
constexpr explicit SReg(int index_)
|
||||||
|
: VReg(32, index_)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DReg : public VReg {
|
||||||
|
constexpr explicit DReg(int index_)
|
||||||
|
: VReg(64, index_)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct QReg : public VReg {
|
||||||
|
constexpr explicit QReg(int index_)
|
||||||
|
: VReg(128, index_)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct VReg_8B : public VRegArranged {
|
||||||
|
constexpr explicit VReg_8B(int reg_index_)
|
||||||
|
: VRegArranged(64, reg_index_, 64 / 8)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct VReg_4H : public VRegArranged {
|
||||||
|
constexpr explicit VReg_4H(int reg_index_)
|
||||||
|
: VRegArranged(64, reg_index_, 64 / 4)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct VReg_2S : public VRegArranged {
|
||||||
|
constexpr explicit VReg_2S(int reg_index_)
|
||||||
|
: VRegArranged(64, reg_index_, 64 / 2)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct VReg_1D : public VRegArranged {
|
||||||
|
constexpr explicit VReg_1D(int reg_index_)
|
||||||
|
: VRegArranged(64, reg_index_, 64 / 1)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct VReg_16B : public VRegArranged {
|
||||||
|
constexpr explicit VReg_16B(int reg_index_)
|
||||||
|
: VRegArranged(128, reg_index_, 128 / 16)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct VReg_8H : public VRegArranged {
|
||||||
|
constexpr explicit VReg_8H(int reg_index_)
|
||||||
|
: VRegArranged(128, reg_index_, 128 / 8)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct VReg_4S : public VRegArranged {
|
||||||
|
constexpr explicit VReg_4S(int reg_index_)
|
||||||
|
: VRegArranged(128, reg_index_, 128 / 4)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct VReg_2D : public VRegArranged {
|
||||||
|
constexpr explicit VReg_2D(int reg_index_)
|
||||||
|
: VRegArranged(128, reg_index_, 128 / 2)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct VReg_1Q : public VRegArranged {
|
||||||
|
constexpr explicit VReg_1Q(int reg_index_)
|
||||||
|
: VRegArranged(128, reg_index_, 128 / 1)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Elem {
|
||||||
|
constexpr explicit Elem(unsigned esize_, int reg_, unsigned elem_index_)
|
||||||
|
: m_esize(esize_), m_reg(reg_), m_elem_index(elem_index_)
|
||||||
|
{
|
||||||
|
if (elem_index_ >= 128 / esize_)
|
||||||
|
throw "invalid elem_index";
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr unsigned esize() const { return m_esize; }
|
||||||
|
constexpr int reg_index() const { return m_reg; }
|
||||||
|
constexpr unsigned elem_index() const { return m_elem_index; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
unsigned m_esize;
|
||||||
|
int m_reg;
|
||||||
|
unsigned m_elem_index;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename E>
|
||||||
|
struct ElemSelector {
|
||||||
|
constexpr explicit ElemSelector(int reg_index_)
|
||||||
|
: m_reg_index(reg_index_)
|
||||||
|
{}
|
||||||
|
|
||||||
|
constexpr int reg_index() const { return m_reg_index; }
|
||||||
|
|
||||||
|
constexpr E operator[](unsigned elem_index) const { return E{m_reg_index, elem_index}; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
int m_reg_index;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct BElem : public Elem {
|
||||||
|
constexpr explicit BElem(int reg_, unsigned elem_index_)
|
||||||
|
: Elem(2, reg_, elem_index_)
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct HElem : public Elem {
|
||||||
|
constexpr explicit HElem(int reg_, unsigned elem_index_)
|
||||||
|
: Elem(2, reg_, elem_index_)
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct SElem : public Elem {
|
||||||
|
constexpr explicit SElem(int reg_, unsigned elem_index_)
|
||||||
|
: Elem(4, reg_, elem_index_)
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DElem : public Elem {
|
||||||
|
constexpr explicit DElem(int reg_, unsigned elem_index_)
|
||||||
|
: Elem(8, reg_, elem_index_)
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DElem_1 : public DElem {
|
||||||
|
constexpr /* implict */ DElem_1(DElem inner)
|
||||||
|
: DElem(inner)
|
||||||
|
{
|
||||||
|
if (inner.elem_index() != 1)
|
||||||
|
throw "invalid DElem_1";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr BReg VReg::toB() const
|
||||||
|
{
|
||||||
|
return BReg{index()};
|
||||||
|
}
|
||||||
|
constexpr HReg VReg::toH() const
|
||||||
|
{
|
||||||
|
return HReg{index()};
|
||||||
|
}
|
||||||
|
constexpr SReg VReg::toS() const
|
||||||
|
{
|
||||||
|
return SReg{index()};
|
||||||
|
}
|
||||||
|
constexpr DReg VReg::toD() const
|
||||||
|
{
|
||||||
|
return DReg{index()};
|
||||||
|
}
|
||||||
|
constexpr QReg VReg::toQ() const
|
||||||
|
{
|
||||||
|
return QReg{index()};
|
||||||
|
}
|
||||||
|
|
||||||
|
struct VRegSelector {
|
||||||
|
constexpr explicit VRegSelector(int reg_index)
|
||||||
|
: m_reg_index(reg_index)
|
||||||
|
{}
|
||||||
|
|
||||||
|
constexpr int index() const { return m_reg_index; }
|
||||||
|
|
||||||
|
constexpr ElemSelector<BElem> B() const { return ElemSelector<BElem>(index()); }
|
||||||
|
constexpr ElemSelector<HElem> H() const { return ElemSelector<HElem>(index()); }
|
||||||
|
constexpr ElemSelector<SElem> S() const { return ElemSelector<SElem>(index()); }
|
||||||
|
constexpr ElemSelector<DElem> D() const { return ElemSelector<DElem>(index()); }
|
||||||
|
|
||||||
|
constexpr VReg_8B B8() const { return VReg_8B{index()}; }
|
||||||
|
constexpr VReg_4H H4() const { return VReg_4H{index()}; }
|
||||||
|
constexpr VReg_2S S2() const { return VReg_2S{index()}; }
|
||||||
|
constexpr VReg_1D D1() const { return VReg_1D{index()}; }
|
||||||
|
constexpr VReg_16B B16() const { return VReg_16B{index()}; }
|
||||||
|
constexpr VReg_8H H8() const { return VReg_8H{index()}; }
|
||||||
|
constexpr VReg_4S S4() const { return VReg_4S{index()}; }
|
||||||
|
constexpr VReg_2D D2() const { return VReg_2D{index()}; }
|
||||||
|
constexpr VReg_1Q Q1() const { return VReg_1Q{index()}; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
int m_reg_index;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace oaknut
|
24
externals/dynarmic/externals/oaknut/include/oaknut/impl/string_literal.hpp
vendored
Executable file
24
externals/dynarmic/externals/oaknut/include/oaknut/impl/string_literal.hpp
vendored
Executable file
|
@ -0,0 +1,24 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cstddef>
|
||||||
|
|
||||||
|
namespace oaknut {
|
||||||
|
|
||||||
|
template<size_t N>
|
||||||
|
struct StringLiteral {
|
||||||
|
constexpr StringLiteral(const char (&str)[N])
|
||||||
|
{
|
||||||
|
std::copy_n(str, N, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr std::size_t strlen = N - 1;
|
||||||
|
static constexpr std::size_t size = N;
|
||||||
|
|
||||||
|
char value[N];
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace oaknut
|
306
externals/dynarmic/externals/oaknut/include/oaknut/oaknut.hpp
vendored
Executable file
306
externals/dynarmic/externals/oaknut/include/oaknut/oaknut.hpp
vendored
Executable file
|
@ -0,0 +1,306 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#include <bit>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <optional>
|
||||||
|
#include <tuple>
|
||||||
|
#include <type_traits>
|
||||||
|
#include <variant>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "oaknut/impl/enum.hpp"
|
||||||
|
#include "oaknut/impl/imm.hpp"
|
||||||
|
#include "oaknut/impl/list.hpp"
|
||||||
|
#include "oaknut/impl/multi_typed_name.hpp"
|
||||||
|
#include "oaknut/impl/offset.hpp"
|
||||||
|
#include "oaknut/impl/reg.hpp"
|
||||||
|
#include "oaknut/impl/string_literal.hpp"
|
||||||
|
|
||||||
|
namespace oaknut {
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
|
||||||
|
template<StringLiteral bs, StringLiteral barg>
|
||||||
|
constexpr std::uint32_t get_bits()
|
||||||
|
{
|
||||||
|
std::uint32_t result = 0;
|
||||||
|
for (std::size_t i = 0; i < 32; i++) {
|
||||||
|
for (std::size_t a = 0; a < barg.strlen; a++) {
|
||||||
|
if (bs.value[i] == barg.value[a]) {
|
||||||
|
result |= 1 << (31 - i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class... Ts>
|
||||||
|
struct overloaded : Ts... {
|
||||||
|
using Ts::operator()...;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class... Ts>
|
||||||
|
overloaded(Ts...) -> overloaded<Ts...>;
|
||||||
|
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
|
struct Label {
|
||||||
|
public:
|
||||||
|
Label() = default;
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<typename Policy>
|
||||||
|
friend class BasicCodeGenerator;
|
||||||
|
|
||||||
|
explicit Label(std::uintptr_t addr)
|
||||||
|
: m_addr(addr)
|
||||||
|
{}
|
||||||
|
|
||||||
|
using EmitFunctionType = std::uint32_t (*)(std::uintptr_t wb_addr, std::uintptr_t resolved_addr);
|
||||||
|
|
||||||
|
struct Writeback {
|
||||||
|
std::uintptr_t m_wb_addr;
|
||||||
|
std::uint32_t m_mask;
|
||||||
|
EmitFunctionType m_fn;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::optional<std::uintptr_t> m_addr;
|
||||||
|
std::vector<Writeback> m_wbs;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Policy>
|
||||||
|
class BasicCodeGenerator : public Policy {
|
||||||
|
public:
|
||||||
|
BasicCodeGenerator(typename Policy::constructor_argument_type arg)
|
||||||
|
: Policy(arg)
|
||||||
|
{}
|
||||||
|
|
||||||
|
Label l()
|
||||||
|
{
|
||||||
|
return Label{Policy::current_address()};
|
||||||
|
}
|
||||||
|
|
||||||
|
void l(Label& label)
|
||||||
|
{
|
||||||
|
if (label.m_addr)
|
||||||
|
throw "label already resolved";
|
||||||
|
|
||||||
|
const auto target_addr = Policy::current_address();
|
||||||
|
label.m_addr = target_addr;
|
||||||
|
for (auto& wb : label.m_wbs) {
|
||||||
|
const std::uint32_t value = wb.m_fn(wb.m_wb_addr, target_addr);
|
||||||
|
Policy::set_at_address(wb.m_wb_addr, value, wb.m_mask);
|
||||||
|
}
|
||||||
|
label.m_wbs.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "oaknut/impl/arm64_mnemonics.inc.hpp"
|
||||||
|
#include "oaknut/impl/fpsimd_mnemonics.inc.hpp"
|
||||||
|
|
||||||
|
void RET()
|
||||||
|
{
|
||||||
|
return RET(XReg{30});
|
||||||
|
}
|
||||||
|
|
||||||
|
void MOV(WReg wd, uint32_t imm)
|
||||||
|
{
|
||||||
|
if (wd.index() == 31)
|
||||||
|
return;
|
||||||
|
if (MovImm16::is_valid(imm))
|
||||||
|
return MOVZ(wd, imm);
|
||||||
|
if (MovImm16::is_valid(~static_cast<std::uint64_t>(imm)))
|
||||||
|
return MOVN(wd, imm);
|
||||||
|
if (detail::encode_bit_imm(imm))
|
||||||
|
return ORR(wd, WzrReg{}, imm);
|
||||||
|
|
||||||
|
MOVZ(wd, {static_cast<std::uint16_t>(imm >> 0), MovImm16Shift::SHL_0});
|
||||||
|
MOVK(wd, {static_cast<std::uint16_t>(imm >> 16), MovImm16Shift::SHL_16});
|
||||||
|
}
|
||||||
|
|
||||||
|
void MOV(XReg xd, uint64_t imm)
|
||||||
|
{
|
||||||
|
if (xd.index() == 31)
|
||||||
|
return;
|
||||||
|
if (imm >> 32 == 0)
|
||||||
|
return MOV(xd.toW(), static_cast<std::uint32_t>(imm));
|
||||||
|
if (MovImm16::is_valid(imm))
|
||||||
|
return MOVZ(xd, imm);
|
||||||
|
if (MovImm16::is_valid(~imm))
|
||||||
|
return MOVN(xd, imm);
|
||||||
|
if (detail::encode_bit_imm(imm))
|
||||||
|
return ORR(xd, ZrReg{}, imm);
|
||||||
|
|
||||||
|
bool movz_done = false;
|
||||||
|
int shift_count = 0;
|
||||||
|
|
||||||
|
if (detail::encode_bit_imm(static_cast<std::uint32_t>(imm))) {
|
||||||
|
ORR(xd.toW(), WzrReg{}, static_cast<std::uint32_t>(imm));
|
||||||
|
imm >>= 32;
|
||||||
|
movz_done = true;
|
||||||
|
shift_count = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (imm != 0) {
|
||||||
|
const uint16_t hw = static_cast<uint16_t>(imm);
|
||||||
|
if (hw != 0) {
|
||||||
|
if (movz_done) {
|
||||||
|
MOVK(xd, {hw, static_cast<MovImm16Shift>(shift_count)});
|
||||||
|
} else {
|
||||||
|
MOVZ(xd, {hw, static_cast<MovImm16Shift>(shift_count)});
|
||||||
|
movz_done = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
imm >>= 16;
|
||||||
|
shift_count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
#include "oaknut/impl/arm64_encode_helpers.inc.hpp"
|
||||||
|
|
||||||
|
template<StringLiteral bs, StringLiteral... bargs, typename... Ts>
|
||||||
|
void emit(Ts... args)
|
||||||
|
{
|
||||||
|
std::uint32_t encoding = detail::get_bits<bs, "1">();
|
||||||
|
encoding |= (0 | ... | encode<detail::get_bits<bs, bargs>()>(std::forward<Ts>(args)));
|
||||||
|
Policy::append(encoding);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<std::uint32_t splat, std::size_t size, std::size_t align>
|
||||||
|
std::uint32_t encode(AddrOffset<size, align> v)
|
||||||
|
{
|
||||||
|
static_assert(std::popcount(splat) == size - align);
|
||||||
|
|
||||||
|
const auto encode_fn = [](std::uintptr_t current_addr, std::uintptr_t target) {
|
||||||
|
const std::ptrdiff_t diff = target - current_addr;
|
||||||
|
return pdep<splat>(AddrOffset<size, align>::encode(diff));
|
||||||
|
};
|
||||||
|
|
||||||
|
return std::visit(detail::overloaded{
|
||||||
|
[&](std::uint32_t encoding) {
|
||||||
|
return pdep<splat>(encoding);
|
||||||
|
},
|
||||||
|
[&](Label* label) {
|
||||||
|
if (label->m_addr) {
|
||||||
|
return encode_fn(Policy::current_address(), *label->m_addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
label->m_wbs.emplace_back(Label::Writeback{Policy::current_address(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)});
|
||||||
|
return 0u;
|
||||||
|
},
|
||||||
|
[&](void* p) {
|
||||||
|
return encode_fn(Policy::current_address(), reinterpret_cast<std::uintptr_t>(p));
|
||||||
|
},
|
||||||
|
},
|
||||||
|
v.m_payload);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<std::uint32_t splat, std::size_t size>
|
||||||
|
std::uint32_t encode(PageOffset<size> v)
|
||||||
|
{
|
||||||
|
static_assert(std::popcount(splat) == size);
|
||||||
|
|
||||||
|
const auto encode_fn = [](std::uintptr_t current_addr, std::uintptr_t target) {
|
||||||
|
return pdep<splat>(PageOffset<size>::encode(current_addr, target));
|
||||||
|
};
|
||||||
|
|
||||||
|
return std::visit(detail::overloaded{
|
||||||
|
[&](Label* label) {
|
||||||
|
if (label->m_addr) {
|
||||||
|
return encode_fn(Policy::current_address(), *label->m_addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
label->m_wbs.emplace_back(Label::Writeback{Policy::current_address(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)});
|
||||||
|
return 0u;
|
||||||
|
},
|
||||||
|
[&](void* p) {
|
||||||
|
return encode_fn(Policy::current_address(), reinterpret_cast<std::uintptr_t>(p));
|
||||||
|
},
|
||||||
|
},
|
||||||
|
v.m_payload);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PointerCodeGeneratorPolicy {
|
||||||
|
public:
|
||||||
|
template<typename T>
|
||||||
|
T ptr()
|
||||||
|
{
|
||||||
|
static_assert(std::is_pointer_v<T>);
|
||||||
|
return reinterpret_cast<T>(m_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_ptr(std::uint32_t* ptr_)
|
||||||
|
{
|
||||||
|
m_ptr = ptr_;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
using constructor_argument_type = std::uint32_t*;
|
||||||
|
|
||||||
|
PointerCodeGeneratorPolicy(std::uint32_t* ptr_)
|
||||||
|
: m_ptr(ptr_)
|
||||||
|
{}
|
||||||
|
|
||||||
|
void append(std::uint32_t instruction)
|
||||||
|
{
|
||||||
|
*m_ptr++ = instruction;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::uintptr_t current_address()
|
||||||
|
{
|
||||||
|
return reinterpret_cast<std::uintptr_t>(m_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_at_address(std::uintptr_t addr, std::uint32_t value, std::uint32_t mask)
|
||||||
|
{
|
||||||
|
std::uint32_t* p = reinterpret_cast<std::uint32_t*>(addr);
|
||||||
|
*p = (*p & mask) | value;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::uint32_t* m_ptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
using CodeGenerator = BasicCodeGenerator<PointerCodeGeneratorPolicy>;
|
||||||
|
|
||||||
|
namespace util {
|
||||||
|
|
||||||
|
inline constexpr WReg W0{0}, W1{1}, W2{2}, W3{3}, W4{4}, W5{5}, W6{6}, W7{7}, W8{8}, W9{9}, W10{10}, W11{11}, W12{12}, W13{13}, W14{14}, W15{15}, W16{16}, W17{17}, W18{18}, W19{19}, W20{20}, W21{21}, W22{22}, W23{23}, W24{24}, W25{25}, W26{26}, W27{27}, W28{28}, W29{29}, W30{30};
|
||||||
|
inline constexpr XReg X0{0}, X1{1}, X2{2}, X3{3}, X4{4}, X5{5}, X6{6}, X7{7}, X8{8}, X9{9}, X10{10}, X11{11}, X12{12}, X13{13}, X14{14}, X15{15}, X16{16}, X17{17}, X18{18}, X19{19}, X20{20}, X21{21}, X22{22}, X23{23}, X24{24}, X25{25}, X26{26}, X27{27}, X28{28}, X29{29}, X30{30};
|
||||||
|
inline constexpr ZrReg ZR{}, XZR{};
|
||||||
|
inline constexpr WzrReg WZR{};
|
||||||
|
inline constexpr SpReg SP{}, XSP{};
|
||||||
|
inline constexpr WspReg WSP{};
|
||||||
|
|
||||||
|
inline constexpr VRegSelector V0{0}, V1{1}, V2{2}, V3{3}, V4{4}, V5{5}, V6{6}, V7{7}, V8{8}, V9{9}, V10{10}, V11{11}, V12{12}, V13{13}, V14{14}, V15{15}, V16{16}, V17{17}, V18{18}, V19{19}, V20{20}, V21{21}, V22{22}, V23{23}, V24{24}, V25{25}, V26{26}, V27{27}, V28{28}, V29{29}, V30{30}, V31{31};
|
||||||
|
inline constexpr QReg Q0{0}, Q1{1}, Q2{2}, Q3{3}, Q4{4}, Q5{5}, Q6{6}, Q7{7}, Q8{8}, Q9{9}, Q10{10}, Q11{11}, Q12{12}, Q13{13}, Q14{14}, Q15{15}, Q16{16}, Q17{17}, Q18{18}, Q19{19}, Q20{20}, Q21{21}, Q22{22}, Q23{23}, Q24{24}, Q25{25}, Q26{26}, Q27{27}, Q28{28}, Q29{29}, Q30{30}, Q31{31};
|
||||||
|
inline constexpr DReg D0{0}, D1{1}, D2{2}, D3{3}, D4{4}, D5{5}, D6{6}, D7{7}, D8{8}, D9{9}, D10{10}, D11{11}, D12{12}, D13{13}, D14{14}, D15{15}, D16{16}, D17{17}, D18{18}, D19{19}, D20{20}, D21{21}, D22{22}, D23{23}, D24{24}, D25{25}, D26{26}, D27{27}, D28{28}, D29{29}, D30{30}, D31{31};
|
||||||
|
inline constexpr SReg S0{0}, S1{1}, S2{2}, S3{3}, S4{4}, S5{5}, S6{6}, S7{7}, S8{8}, S9{9}, S10{10}, S11{11}, S12{12}, S13{13}, S14{14}, S15{15}, S16{16}, S17{17}, S18{18}, S19{19}, S20{20}, S21{21}, S22{22}, S23{23}, S24{24}, S25{25}, S26{26}, S27{27}, S28{28}, S29{29}, S30{30}, S31{31};
|
||||||
|
inline constexpr HReg H0{0}, H1{1}, H2{2}, H3{3}, H4{4}, H5{5}, H6{6}, H7{7}, H8{8}, H9{9}, H10{10}, H11{11}, H12{12}, H13{13}, H14{14}, H15{15}, H16{16}, H17{17}, H18{18}, H19{19}, H20{20}, H21{21}, H22{22}, H23{23}, H24{24}, H25{25}, H26{26}, H27{27}, H28{28}, H29{29}, H30{30}, H31{31};
|
||||||
|
inline constexpr BReg B0{0}, B1{1}, B2{2}, B3{3}, B4{4}, B5{5}, B6{6}, B7{7}, B8{8}, B9{9}, B10{10}, B11{11}, B12{12}, B13{13}, B14{14}, B15{15}, B16{16}, B17{17}, B18{18}, B19{19}, B20{20}, B21{21}, B22{22}, B23{23}, B24{24}, B25{25}, B26{26}, B27{27}, B28{28}, B29{29}, B30{30}, B31{31};
|
||||||
|
|
||||||
|
inline constexpr Cond EQ{Cond::EQ}, NE{Cond::NE}, CS{Cond::CS}, CC{Cond::CC}, MI{Cond::MI}, PL{Cond::PL}, VS{Cond::VS}, VC{Cond::VC}, HI{Cond::HI}, LS{Cond::LS}, GE{Cond::GE}, LT{Cond::LT}, GT{Cond::GT}, LE{Cond::LE}, AL{Cond::AL}, NV{Cond::NV}, HS{Cond::HS}, LO{Cond::LO};
|
||||||
|
|
||||||
|
inline constexpr auto UXTB{MultiTypedName<AddSubExt::UXTB>{}};
|
||||||
|
inline constexpr auto UXTH{MultiTypedName<AddSubExt::UXTH>{}};
|
||||||
|
inline constexpr auto UXTW{MultiTypedName<AddSubExt::UXTW, IndexExt::UXTW>{}};
|
||||||
|
inline constexpr auto UXTX{MultiTypedName<AddSubExt::UXTX>{}};
|
||||||
|
inline constexpr auto SXTB{MultiTypedName<AddSubExt::SXTB>{}};
|
||||||
|
inline constexpr auto SXTH{MultiTypedName<AddSubExt::SXTH>{}};
|
||||||
|
inline constexpr auto SXTW{MultiTypedName<AddSubExt::SXTW, IndexExt::SXTW>{}};
|
||||||
|
inline constexpr auto SXTX{MultiTypedName<AddSubExt::SXTX, IndexExt::SXTX>{}};
|
||||||
|
inline constexpr auto LSL{MultiTypedName<AddSubExt::LSL, IndexExt::LSL, AddSubShift::LSL, LogShift::LSL, LslSymbol::LSL>{}};
|
||||||
|
inline constexpr auto LSR{MultiTypedName<AddSubShift::LSR, LogShift::LSR>{}};
|
||||||
|
inline constexpr auto ASR{MultiTypedName<AddSubShift::ASR, LogShift::ASR>{}};
|
||||||
|
inline constexpr auto ROR{MultiTypedName<LogShift::ROR>{}};
|
||||||
|
|
||||||
|
inline constexpr PostIndexed POST_INDEXED{};
|
||||||
|
inline constexpr PreIndexed PRE_INDEXED{};
|
||||||
|
inline constexpr MslSymbol MSL{MslSymbol::MSL};
|
||||||
|
|
||||||
|
} // namespace util
|
||||||
|
|
||||||
|
} // namespace oaknut
|
117
externals/dynarmic/externals/oaknut/tests/basic.cpp
vendored
Executable file
117
externals/dynarmic/externals/oaknut/tests/basic.cpp
vendored
Executable file
|
@ -0,0 +1,117 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
#include <catch2/catch_test_macros.hpp>
|
||||||
|
|
||||||
|
#include "oaknut/code_block.hpp"
|
||||||
|
#include "oaknut/oaknut.hpp"
|
||||||
|
#include "rand_int.hpp"
|
||||||
|
|
||||||
|
using namespace oaknut;
|
||||||
|
using namespace oaknut::util;
|
||||||
|
|
||||||
|
TEST_CASE("Basic Test")
|
||||||
|
{
|
||||||
|
CodeBlock mem{4096};
|
||||||
|
CodeGenerator code{mem.ptr()};
|
||||||
|
|
||||||
|
mem.unprotect();
|
||||||
|
|
||||||
|
code.MOV(W0, 42);
|
||||||
|
code.RET();
|
||||||
|
|
||||||
|
mem.protect();
|
||||||
|
mem.invalidate_all();
|
||||||
|
|
||||||
|
int result = ((int (*)())mem.ptr())();
|
||||||
|
REQUIRE(result == 42);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("Fibonacci")
|
||||||
|
{
|
||||||
|
CodeBlock mem{4096};
|
||||||
|
CodeGenerator code{mem.ptr()};
|
||||||
|
|
||||||
|
mem.unprotect();
|
||||||
|
|
||||||
|
auto fib = code.ptr<int (*)(int)>();
|
||||||
|
Label start, end, zero, recurse;
|
||||||
|
|
||||||
|
code.l(start);
|
||||||
|
code.STP(X29, X30, SP, PRE_INDEXED, -32);
|
||||||
|
code.STP(X20, X19, SP, 16);
|
||||||
|
code.MOV(X29, SP);
|
||||||
|
code.MOV(W19, W0);
|
||||||
|
code.SUBS(W0, W0, 1);
|
||||||
|
code.B(LT, zero);
|
||||||
|
code.B(NE, recurse);
|
||||||
|
code.MOV(W0, 1);
|
||||||
|
code.B(end);
|
||||||
|
|
||||||
|
code.l(zero);
|
||||||
|
code.MOV(W0, WZR);
|
||||||
|
code.B(end);
|
||||||
|
|
||||||
|
code.l(recurse);
|
||||||
|
code.BL(start);
|
||||||
|
code.MOV(W20, W0);
|
||||||
|
code.SUB(W0, W19, 2);
|
||||||
|
code.BL(start);
|
||||||
|
code.ADD(W0, W0, W20);
|
||||||
|
|
||||||
|
code.l(end);
|
||||||
|
code.LDP(X20, X19, SP, 16);
|
||||||
|
code.LDP(X29, X30, SP, POST_INDEXED, 32);
|
||||||
|
code.RET();
|
||||||
|
|
||||||
|
mem.protect();
|
||||||
|
mem.invalidate_all();
|
||||||
|
|
||||||
|
REQUIRE(fib(0) == 0);
|
||||||
|
REQUIRE(fib(1) == 1);
|
||||||
|
REQUIRE(fib(5) == 5);
|
||||||
|
REQUIRE(fib(9) == 34);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("Immediate generation (32-bit)")
|
||||||
|
{
|
||||||
|
CodeBlock mem{4096};
|
||||||
|
|
||||||
|
for (int i = 0; i < 0x100000; i++) {
|
||||||
|
const std::uint32_t value = RandInt<std::uint32_t>(0, 0xffffffff);
|
||||||
|
|
||||||
|
CodeGenerator code{mem.ptr()};
|
||||||
|
|
||||||
|
auto f = code.ptr<std::uint64_t (*)()>();
|
||||||
|
mem.unprotect();
|
||||||
|
code.MOV(W0, value);
|
||||||
|
code.RET();
|
||||||
|
mem.protect();
|
||||||
|
mem.invalidate_all();
|
||||||
|
|
||||||
|
REQUIRE(f() == value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("Immediate generation (64-bit)")
|
||||||
|
{
|
||||||
|
CodeBlock mem{4096};
|
||||||
|
|
||||||
|
for (int i = 0; i < 0x100000; i++) {
|
||||||
|
const std::uint64_t value = RandInt<std::uint64_t>(0, 0xffffffff'ffffffff);
|
||||||
|
|
||||||
|
CodeGenerator code{mem.ptr()};
|
||||||
|
|
||||||
|
auto f = code.ptr<std::uint64_t (*)()>();
|
||||||
|
mem.unprotect();
|
||||||
|
code.MOV(X0, value);
|
||||||
|
code.RET();
|
||||||
|
mem.protect();
|
||||||
|
mem.invalidate_all();
|
||||||
|
|
||||||
|
REQUIRE(f() == value);
|
||||||
|
}
|
||||||
|
}
|
785
externals/dynarmic/externals/oaknut/tests/fpsimd.cpp
vendored
Executable file
785
externals/dynarmic/externals/oaknut/tests/fpsimd.cpp
vendored
Executable file
|
@ -0,0 +1,785 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
|
#include <catch2/catch_test_macros.hpp>
|
||||||
|
|
||||||
|
#include "oaknut/oaknut.hpp"
|
||||||
|
|
||||||
|
#define T(HEX, CMD) \
|
||||||
|
TEST_CASE(#CMD) \
|
||||||
|
{ \
|
||||||
|
using namespace oaknut; \
|
||||||
|
using namespace oaknut::util; \
|
||||||
|
\
|
||||||
|
std::uint32_t result; \
|
||||||
|
CodeGenerator code{&result}; \
|
||||||
|
\
|
||||||
|
code.CMD; \
|
||||||
|
\
|
||||||
|
REQUIRE(result == HEX); \
|
||||||
|
}
|
||||||
|
|
||||||
|
T(0x5ee0bb61, ABS(D1, D27))
|
||||||
|
T(0x4e20ba03, ABS(V3.B16(), V16.B16()))
|
||||||
|
T(0x5ef98449, ADD(D9, D2, D25))
|
||||||
|
T(0x4eef8697, ADD(V23.D2(), V20.D2(), V15.D2()))
|
||||||
|
T(0x0eb743d1, ADDHN(V17.S2(), V30.D2(), V23.D2()))
|
||||||
|
T(0x5ef1b933, ADDP(D19, V9.D2()))
|
||||||
|
T(0x0e7ebf6e, ADDP(V14.H4(), V27.H4(), V30.H4()))
|
||||||
|
T(0x4e31ba47, ADDV(B7, V18.B16()))
|
||||||
|
// AESD
|
||||||
|
// AESE
|
||||||
|
// AESIMC
|
||||||
|
// AESMC
|
||||||
|
T(0x4e2b1d4c, AND(V12.B16(), V10.B16(), V11.B16()))
|
||||||
|
T(0x6f01b7f4, BIC(V20.H8(), 63, LSL, 8))
|
||||||
|
T(0x2f017752, BIC(V18.S2(), 58, LSL, 24))
|
||||||
|
T(0x0e751c85, BIC(V5.B8(), V4.B8(), V21.B8()))
|
||||||
|
T(0x2ef11d4d, BIF(V13.B8(), V10.B8(), V17.B8()))
|
||||||
|
T(0x2eb31f3b, BIT(V27.B8(), V25.B8(), V19.B8()))
|
||||||
|
T(0x2e711ed8, BSL(V24.B8(), V22.B8(), V17.B8()))
|
||||||
|
T(0x0e604aaf, CLS(V15.H4(), V21.H4()))
|
||||||
|
T(0x6e604808, CLZ(V8.H8(), V0.H8()))
|
||||||
|
T(0x7eff8ec2, CMEQ(D2, D22, D31))
|
||||||
|
T(0x2e2b8d57, CMEQ(V23.B8(), V10.B8(), V11.B8()))
|
||||||
|
T(0x5ee09bbf, CMEQ(D31, D29, 0))
|
||||||
|
T(0x4ea09876, CMEQ(V22.S4(), V3.S4(), 0))
|
||||||
|
T(0x5ef23c04, CMGE(D4, D0, D18))
|
||||||
|
T(0x4e203c6f, CMGE(V15.B16(), V3.B16(), V0.B16()))
|
||||||
|
T(0x7ee08822, CMGE(D2, D1, 0))
|
||||||
|
T(0x2ea08bb9, CMGE(V25.S2(), V29.S2(), 0))
|
||||||
|
T(0x5ef036a5, CMGT(D5, D21, D16))
|
||||||
|
T(0x0eb7358b, CMGT(V11.S2(), V12.S2(), V23.S2()))
|
||||||
|
T(0x5ee08957, CMGT(D23, D10, 0))
|
||||||
|
T(0x4ea088eb, CMGT(V11.S4(), V7.S4(), 0))
|
||||||
|
T(0x7ee235f2, CMHI(D18, D15, D2))
|
||||||
|
T(0x6e243596, CMHI(V22.B16(), V12.B16(), V4.B16()))
|
||||||
|
T(0x7ef23faf, CMHS(D15, D29, D18))
|
||||||
|
T(0x2e2d3d8a, CMHS(V10.B8(), V12.B8(), V13.B8()))
|
||||||
|
T(0x7ee098a4, CMLE(D4, D5, 0))
|
||||||
|
T(0x2e2098d3, CMLE(V19.B8(), V6.B8(), 0))
|
||||||
|
T(0x5ee0a980, CMLT(D0, D12, 0))
|
||||||
|
T(0x4e60a892, CMLT(V18.H8(), V4.H8(), 0))
|
||||||
|
T(0x5ee18e03, CMTST(D3, D16, D1))
|
||||||
|
T(0x4e708f65, CMTST(V5.H8(), V27.H8(), V16.H8()))
|
||||||
|
T(0x4e20598d, CNT(V13.B16(), V12.B16()))
|
||||||
|
// DUP
|
||||||
|
T(0x0e0d06cd, DUP(V13.B8(), V22.B()[6]))
|
||||||
|
T(0x0e010fe7, DUP(V7.B8(), WZR))
|
||||||
|
T(0x2e2b1e6e, EOR(V14.B8(), V19.B8(), V11.B8()))
|
||||||
|
T(0x6e1c0a35, EXT(V21.B16(), V17.B16(), V28.B16(), 1))
|
||||||
|
T(0x7ea0d7a4, FABD(S4, S29, S0))
|
||||||
|
T(0x6eecd418, FABD(V24.D2(), V0.D2(), V12.D2()))
|
||||||
|
T(0x1e20c299, FABS(S25, S20))
|
||||||
|
T(0x1e60c114, FABS(D20, D8))
|
||||||
|
T(0x4ee0f999, FABS(V25.D2(), V12.D2()))
|
||||||
|
T(0x7e71ef5b, FACGE(D27, D26, D17))
|
||||||
|
T(0x6e6eed17, FACGE(V23.D2(), V8.D2(), V14.D2()))
|
||||||
|
T(0x7ef8efc0, FACGT(D0, D30, D24))
|
||||||
|
T(0x6eb0ec54, FACGT(V20.S4(), V2.S4(), V16.S4()))
|
||||||
|
T(0x1e242b23, FADD(S3, S25, S4))
|
||||||
|
T(0x1e672a8b, FADD(D11, D20, D7))
|
||||||
|
T(0x4e34d46b, FADD(V11.S4(), V3.S4(), V20.S4()))
|
||||||
|
T(0x7e30db16, FADDP(S22, V24.S2()))
|
||||||
|
T(0x6e21d626, FADDP(V6.S4(), V17.S4(), V1.S4()))
|
||||||
|
T(0x1e23c4ce, FCCMP(S6, S3, 14, GT))
|
||||||
|
T(0x1e7104aa, FCCMP(D5, D17, 10, EQ))
|
||||||
|
T(0x1e28c678, FCCMPE(S19, S8, 8, GT))
|
||||||
|
T(0x1e6195dd, FCCMPE(D14, D1, 13, LS))
|
||||||
|
T(0x5e31e659, FCMEQ(S25, S18, S17))
|
||||||
|
T(0x0e27e7ba, FCMEQ(V26.S2(), V29.S2(), V7.S2()))
|
||||||
|
T(0x5ea0da1d, FCMEQ(S29, S16, 0.0))
|
||||||
|
T(0x4ee0db8a, FCMEQ(V10.D2(), V28.D2(), 0.0))
|
||||||
|
T(0x7e2de473, FCMGE(S19, S3, S13))
|
||||||
|
T(0x2e33e726, FCMGE(V6.S2(), V25.S2(), V19.S2()))
|
||||||
|
T(0x7ea0c8d3, FCMGE(S19, S6, 0.0))
|
||||||
|
T(0x6ea0ca7b, FCMGE(V27.S4(), V19.S4(), 0.0))
|
||||||
|
T(0x7eb7e65d, FCMGT(S29, S18, S23))
|
||||||
|
T(0x6ef0e6ac, FCMGT(V12.D2(), V21.D2(), V16.D2()))
|
||||||
|
T(0x5ee0cb5a, FCMGT(D26, D26, 0.0))
|
||||||
|
T(0x4ea0c917, FCMGT(V23.S4(), V8.S4(), 0.0))
|
||||||
|
T(0x7ea0dbe1, FCMLE(S1, S31, 0.0))
|
||||||
|
T(0x6ea0da69, FCMLE(V9.S4(), V19.S4(), 0.0))
|
||||||
|
T(0x5ea0ea5f, FCMLT(S31, S18, 0.0))
|
||||||
|
T(0x4ee0e8de, FCMLT(V30.D2(), V6.D2(), 0.0))
|
||||||
|
T(0x1e322040, FCMP(S2, S18))
|
||||||
|
T(0x1e202248, FCMP(S18, 0.0))
|
||||||
|
T(0x1e6520a0, FCMP(D5, D5))
|
||||||
|
T(0x1e602108, FCMP(D8, 0.0))
|
||||||
|
T(0x1e332370, FCMPE(S27, S19))
|
||||||
|
T(0x1e202018, FCMPE(S0, 0.0))
|
||||||
|
T(0x1e7120b0, FCMPE(D5, D17))
|
||||||
|
T(0x1e602298, FCMPE(D20, 0.0))
|
||||||
|
T(0x1e32ed68, FCSEL(S8, S11, S18, AL))
|
||||||
|
T(0x1e7b1e21, FCSEL(D1, D17, D27, NE))
|
||||||
|
T(0x1ee24022, FCVT(S2, H1))
|
||||||
|
T(0x1ee2c33c, FCVT(D28, H25))
|
||||||
|
T(0x1e23c379, FCVT(H25, S27))
|
||||||
|
T(0x1e22c1a5, FCVT(D5, S13))
|
||||||
|
T(0x1e63c2b1, FCVT(H17, D21))
|
||||||
|
T(0x1e624309, FCVT(S9, D24))
|
||||||
|
T(0x1e2400a0, FCVTAS(W0, S5))
|
||||||
|
T(0x9e24000e, FCVTAS(X14, S0))
|
||||||
|
T(0x1e640191, FCVTAS(W17, D12))
|
||||||
|
T(0x9e6403d6, FCVTAS(X22, D30))
|
||||||
|
T(0x5e21c8a7, FCVTAS(S7, S5))
|
||||||
|
T(0x0e21c8df, FCVTAS(V31.S2(), V6.S2()))
|
||||||
|
T(0x1e25036b, FCVTAU(W11, S27))
|
||||||
|
T(0x9e25030c, FCVTAU(X12, S24))
|
||||||
|
T(0x1e65002e, FCVTAU(W14, D1))
|
||||||
|
T(0x9e65003e, FCVTAU(X30, D1))
|
||||||
|
T(0x7e61cabd, FCVTAU(D29, D21))
|
||||||
|
T(0x2e21c880, FCVTAU(V0.S2(), V4.S2()))
|
||||||
|
T(0x4e217b66, FCVTL2(V6.S4(), V27.H8()))
|
||||||
|
T(0x1e30016d, FCVTMS(W13, S11))
|
||||||
|
T(0x9e3002b5, FCVTMS(X21, S21))
|
||||||
|
T(0x1e7003dd, FCVTMS(W29, D30))
|
||||||
|
T(0x9e700080, FCVTMS(X0, D4))
|
||||||
|
T(0x5e21b9b6, FCVTMS(S22, S13))
|
||||||
|
T(0x4e61ba4e, FCVTMS(V14.D2(), V18.D2()))
|
||||||
|
T(0x1e31002d, FCVTMU(W13, S1))
|
||||||
|
T(0x9e310281, FCVTMU(X1, S20))
|
||||||
|
T(0x1e71000e, FCVTMU(W14, D0))
|
||||||
|
T(0x9e710010, FCVTMU(X16, D0))
|
||||||
|
T(0x7e61bb3b, FCVTMU(D27, D25))
|
||||||
|
T(0x2e21b918, FCVTMU(V24.S2(), V8.S2()))
|
||||||
|
T(0x0e616a68, FCVTN(V8.S2(), V19.D2()))
|
||||||
|
T(0x1e200100, FCVTNS(W0, S8))
|
||||||
|
T(0x9e20037f, FCVTNS(XZR, S27))
|
||||||
|
T(0x1e60015e, FCVTNS(W30, D10))
|
||||||
|
T(0x9e600018, FCVTNS(X24, D0))
|
||||||
|
T(0x5e61a846, FCVTNS(D6, D2))
|
||||||
|
T(0x4e21aa81, FCVTNS(V1.S4(), V20.S4()))
|
||||||
|
T(0x1e210248, FCVTNU(W8, S18))
|
||||||
|
T(0x9e2103da, FCVTNU(X26, S30))
|
||||||
|
T(0x1e610120, FCVTNU(W0, D9))
|
||||||
|
T(0x9e61013a, FCVTNU(X26, D9))
|
||||||
|
T(0x7e61aaba, FCVTNU(D26, D21))
|
||||||
|
T(0x6e21aa16, FCVTNU(V22.S4(), V16.S4()))
|
||||||
|
T(0x1e28010d, FCVTPS(W13, S8))
|
||||||
|
T(0x9e2803df, FCVTPS(XZR, S30))
|
||||||
|
T(0x1e6802e9, FCVTPS(W9, D23))
|
||||||
|
T(0x9e6801f7, FCVTPS(X23, D15))
|
||||||
|
T(0x5ee1a986, FCVTPS(D6, D12))
|
||||||
|
T(0x4ea1aa32, FCVTPS(V18.S4(), V17.S4()))
|
||||||
|
T(0x1e29022b, FCVTPU(W11, S17))
|
||||||
|
T(0x9e290381, FCVTPU(X1, S28))
|
||||||
|
T(0x1e690095, FCVTPU(W21, D4))
|
||||||
|
T(0x9e6902b3, FCVTPU(X19, D21))
|
||||||
|
T(0x7ea1abbb, FCVTPU(S27, S29))
|
||||||
|
T(0x6ee1ab06, FCVTPU(V6.D2(), V24.D2()))
|
||||||
|
T(0x7e61687a, FCVTXN(S26, D3))
|
||||||
|
T(0x2e61694c, FCVTXN(V12.S2(), V10.D2()))
|
||||||
|
T(0x1e18c4d6, FCVTZS(W22, S6, 15))
|
||||||
|
T(0x9e18d131, FCVTZS(X17, S9, 12))
|
||||||
|
T(0x1e58fd9b, FCVTZS(W27, D12, 1))
|
||||||
|
T(0x9e5899ee, FCVTZS(X14, D15, 26))
|
||||||
|
T(0x1e380091, FCVTZS(W17, S4))
|
||||||
|
T(0x9e380289, FCVTZS(X9, S20))
|
||||||
|
T(0x1e780117, FCVTZS(W23, D8))
|
||||||
|
T(0x9e7800f5, FCVTZS(X21, D7))
|
||||||
|
T(0x5f2fffdb, FCVTZS(S27, S30, 17))
|
||||||
|
T(0x4f65ff65, FCVTZS(V5.D2(), V27.D2(), 27))
|
||||||
|
T(0x5ee1b932, FCVTZS(D18, D9))
|
||||||
|
T(0x4ee1ba41, FCVTZS(V1.D2(), V18.D2()))
|
||||||
|
T(0x1e19b5d8, FCVTZU(W24, S14, 19))
|
||||||
|
T(0x9e199462, FCVTZU(X2, S3, 27))
|
||||||
|
T(0x1e59fca1, FCVTZU(W1, D5, 1))
|
||||||
|
T(0x9e599bbd, FCVTZU(X29, D29, 26))
|
||||||
|
T(0x1e3900f6, FCVTZU(W22, S7))
|
||||||
|
T(0x9e3900b7, FCVTZU(X23, S5))
|
||||||
|
T(0x1e79031a, FCVTZU(W26, D24))
|
||||||
|
T(0x9e790248, FCVTZU(X8, D18))
|
||||||
|
T(0x7f5afd37, FCVTZU(D23, D9, 38))
|
||||||
|
T(0x2f34fd38, FCVTZU(V24.S2(), V9.S2(), 12))
|
||||||
|
T(0x7ea1baa3, FCVTZU(S3, S21))
|
||||||
|
T(0x6ee1b8c1, FCVTZU(V1.D2(), V6.D2()))
|
||||||
|
T(0x1e3d1999, FDIV(S25, S12, S29))
|
||||||
|
T(0x1e7e1a4e, FDIV(D14, D18, D30))
|
||||||
|
T(0x2e2cfe45, FDIV(V5.S2(), V18.S2(), V12.S2()))
|
||||||
|
T(0x1f114362, FMADD(S2, S27, S17, S16))
|
||||||
|
T(0x1f482240, FMADD(D0, D18, D8, D8))
|
||||||
|
T(0x1e234b5f, FMAX(S31, S26, S3))
|
||||||
|
T(0x1e694894, FMAX(D20, D4, D9))
|
||||||
|
T(0x4e29f568, FMAX(V8.S4(), V11.S4(), V9.S4()))
|
||||||
|
T(0x1e2f6a40, FMAXNM(S0, S18, S15))
|
||||||
|
T(0x1e6d6a99, FMAXNM(D25, D20, D13))
|
||||||
|
T(0x4e2dc6da, FMAXNM(V26.S4(), V22.S4(), V13.S4()))
|
||||||
|
T(0x7e30c9b9, FMAXNMP(S25, V13.S2()))
|
||||||
|
T(0x6e36c794, FMAXNMP(V20.S4(), V28.S4(), V22.S4()))
|
||||||
|
T(0x6e30c8f6, FMAXNMV(S22, V7.S4()))
|
||||||
|
T(0x7e30f8dd, FMAXP(S29, V6.S2()))
|
||||||
|
T(0x6e61f4ab, FMAXP(V11.D2(), V5.D2(), V1.D2()))
|
||||||
|
T(0x6e30fb85, FMAXV(S5, V28.S4()))
|
||||||
|
T(0x1e3c5aae, FMIN(S14, S21, S28))
|
||||||
|
T(0x1e7f58f8, FMIN(D24, D7, D31))
|
||||||
|
T(0x0eb0f63b, FMIN(V27.S2(), V17.S2(), V16.S2()))
|
||||||
|
T(0x1e317886, FMINNM(S6, S4, S17))
|
||||||
|
T(0x1e6e7a5d, FMINNM(D29, D18, D14))
|
||||||
|
T(0x4ea4c44c, FMINNM(V12.S4(), V2.S4(), V4.S4()))
|
||||||
|
T(0x7ef0c895, FMINNMP(D21, V4.D2()))
|
||||||
|
T(0x6efbc4e3, FMINNMP(V3.D2(), V7.D2(), V27.D2()))
|
||||||
|
T(0x6eb0c93d, FMINNMV(S29, V9.S4()))
|
||||||
|
T(0x7ef0fa13, FMINP(D19, V16.D2()))
|
||||||
|
T(0x2eb4f4ac, FMINP(V12.S2(), V5.S2(), V20.S2()))
|
||||||
|
T(0x6eb0f801, FMINV(S1, V0.S4()))
|
||||||
|
T(0x5f8219a6, FMLA(S6, S13, V2.S()[2]))
|
||||||
|
T(0x4fc512a1, FMLA(V1.D2(), V21.D2(), V5.D()[0]))
|
||||||
|
T(0x4e6bcecf, FMLA(V15.D2(), V22.D2(), V11.D2()))
|
||||||
|
T(0x5f8a5094, FMLS(S20, S4, V10.S()[0]))
|
||||||
|
T(0x4fd85b79, FMLS(V25.D2(), V27.D2(), V24.D()[1]))
|
||||||
|
T(0x0ebacca4, FMLS(V4.S2(), V5.S2(), V26.S2()))
|
||||||
|
T(0x1e270027, FMOV(S7, W1))
|
||||||
|
T(0x1e260164, FMOV(W4, S11))
|
||||||
|
T(0x9e670008, FMOV(D8, X0))
|
||||||
|
T(0x9eaf03e0, FMOV(V0.D()[1], XZR))
|
||||||
|
T(0x9e660090, FMOV(X16, D4))
|
||||||
|
T(0x9eae025f, FMOV(XZR, V18.D()[1]))
|
||||||
|
T(0x1e204079, FMOV(S25, S3))
|
||||||
|
T(0x1e6042f8, FMOV(D24, D23))
|
||||||
|
T(0x1e32f01c, FMOV(S28, FImm8{true, 0b001, 0b0111})) // -5.75
|
||||||
|
T(0x1e74901e, FMOV(D30, FImm8{true, 0b010, 0b0100})) // -10.0
|
||||||
|
T(0x0f03f51a, FMOV(V26.S2(), FImm8{false, 0b110, 0b1000})) // 0.75
|
||||||
|
T(0x6f02f58e, FMOV(V14.D2(), FImm8{false, 0b100, 0b1100})) // 0.21875
|
||||||
|
T(0x1f0adaf5, FMSUB(S21, S23, S10, S22))
|
||||||
|
T(0x1f5da840, FMSUB(D0, D2, D29, D10))
|
||||||
|
T(0x5fa39bba, FMUL(S26, S29, V3.S()[3]))
|
||||||
|
T(0x4fb89ad2, FMUL(V18.S4(), V22.S4(), V24.S()[3]))
|
||||||
|
T(0x1e2b0a3c, FMUL(S28, S17, S11))
|
||||||
|
T(0x1e720933, FMUL(D19, D9, D18))
|
||||||
|
T(0x6e7edfa3, FMUL(V3.D2(), V29.D2(), V30.D2()))
|
||||||
|
T(0x5e32dee6, FMULX(S6, S23, S18))
|
||||||
|
T(0x0e27deec, FMULX(V12.S2(), V23.S2(), V7.S2()))
|
||||||
|
T(0x7f879a1f, FMULX(S31, S16, V7.S()[2]))
|
||||||
|
T(0x6fce9836, FMULX(V22.D2(), V1.D2(), V14.D()[1]))
|
||||||
|
T(0x1e2142cc, FNEG(S12, S22))
|
||||||
|
T(0x1e61434b, FNEG(D11, D26))
|
||||||
|
T(0x6ea0fb90, FNEG(V16.S4(), V28.S4()))
|
||||||
|
T(0x1f361be5, FNMADD(S5, S31, S22, S6))
|
||||||
|
T(0x1f7a316d, FNMADD(D13, D11, D26, D12))
|
||||||
|
T(0x1f3e9957, FNMSUB(S23, S10, S30, S6))
|
||||||
|
T(0x1f79da66, FNMSUB(D6, D19, D25, D22))
|
||||||
|
T(0x1e208ab5, FNMUL(S21, S21, S0))
|
||||||
|
T(0x1e6f89eb, FNMUL(D11, D15, D15))
|
||||||
|
T(0x5ea1da18, FRECPE(S24, S16))
|
||||||
|
T(0x0ea1d9df, FRECPE(V31.S2(), V14.S2()))
|
||||||
|
T(0x5e2dfe37, FRECPS(S23, S17, S13))
|
||||||
|
T(0x0e29fcec, FRECPS(V12.S2(), V7.S2(), V9.S2()))
|
||||||
|
T(0x5ee1f998, FRECPX(D24, D12))
|
||||||
|
T(0x1e264106, FRINTA(S6, S8))
|
||||||
|
T(0x1e664376, FRINTA(D22, D27))
|
||||||
|
T(0x6e6188a9, FRINTA(V9.D2(), V5.D2()))
|
||||||
|
T(0x1e27c216, FRINTI(S22, S16))
|
||||||
|
T(0x1e67c071, FRINTI(D17, D3))
|
||||||
|
T(0x6ea19b9d, FRINTI(V29.S4(), V28.S4()))
|
||||||
|
T(0x1e25413e, FRINTM(S30, S9))
|
||||||
|
T(0x1e6541a1, FRINTM(D1, D13))
|
||||||
|
T(0x4e619ad8, FRINTM(V24.D2(), V22.D2()))
|
||||||
|
T(0x1e244098, FRINTN(S24, S4))
|
||||||
|
T(0x1e6440b4, FRINTN(D20, D5))
|
||||||
|
T(0x4e618835, FRINTN(V21.D2(), V1.D2()))
|
||||||
|
T(0x1e24c188, FRINTP(S8, S12))
|
||||||
|
T(0x1e64c292, FRINTP(D18, D20))
|
||||||
|
T(0x0ea18a69, FRINTP(V9.S2(), V19.S2()))
|
||||||
|
T(0x1e274146, FRINTX(S6, S10))
|
||||||
|
T(0x1e674333, FRINTX(D19, D25))
|
||||||
|
T(0x6e619902, FRINTX(V2.D2(), V8.D2()))
|
||||||
|
T(0x1e25c2b2, FRINTZ(S18, S21))
|
||||||
|
T(0x1e65c008, FRINTZ(D8, D0))
|
||||||
|
T(0x0ea19918, FRINTZ(V24.S2(), V8.S2()))
|
||||||
|
T(0x7ea1dbdb, FRSQRTE(S27, S30))
|
||||||
|
T(0x6ee1d8df, FRSQRTE(V31.D2(), V6.D2()))
|
||||||
|
T(0x5ee0ff40, FRSQRTS(D0, D26, D0))
|
||||||
|
T(0x4eb6fe31, FRSQRTS(V17.S4(), V17.S4(), V22.S4()))
|
||||||
|
T(0x1e21c204, FSQRT(S4, S16))
|
||||||
|
T(0x1e61c31c, FSQRT(D28, D24))
|
||||||
|
T(0x6ea1fa1f, FSQRT(V31.S4(), V16.S4()))
|
||||||
|
T(0x1e273b28, FSUB(S8, S25, S7))
|
||||||
|
T(0x1e6139b9, FSUB(D25, D13, D1))
|
||||||
|
T(0x0eadd6b0, FSUB(V16.S2(), V21.S2(), V13.S2()))
|
||||||
|
// INS
|
||||||
|
// INS
|
||||||
|
T(0x0c407b24, LD1(List{V4.S2()}, X25))
|
||||||
|
T(0x4c40a891, LD1(List{V17.S4(), V18.S4()}, X4))
|
||||||
|
T(0x0c406d31, LD1(List{V17.D1(), V18.D1(), V19.D1()}, X9))
|
||||||
|
T(0x4c402b00, LD1(List{V0.S4(), V1.S4(), V2.S4(), V3.S4()}, X24))
|
||||||
|
T(0x4cdf72c8, LD1(List{V8.B16()}, X22, POST_INDEXED, 16))
|
||||||
|
T(0x0cd67504, LD1(List{V4.H4()}, X8, POST_INDEXED, X22))
|
||||||
|
T(0x0cdfaeb7, LD1(List{V23.D1(), V24.D1()}, X21, POST_INDEXED, 16))
|
||||||
|
T(0x0cd0a837, LD1(List{V23.S2(), V24.S2()}, X1, POST_INDEXED, X16))
|
||||||
|
T(0x4cdf6d36, LD1(List{V22.D2(), V23.D2(), V24.D2()}, X9, POST_INDEXED, 48))
|
||||||
|
T(0x0cdc685b, LD1(List{V27.S2(), V28.S2(), V29.S2()}, X2, POST_INDEXED, X28))
|
||||||
|
T(0x0cdf2ebc, LD1(List{V28.D1(), V29.D1(), V30.D1(), V31.D1()}, X21, POST_INDEXED, 32))
|
||||||
|
T(0x0cc0260c, LD1(List{V12.H4(), V13.H4(), V14.H4(), V15.H4()}, X16, POST_INDEXED, X0))
|
||||||
|
T(0x0d400665, LD1(List{V5.B()}[1], X19))
|
||||||
|
T(0x0d4041da, LD1(List{V26.H()}[0], X14))
|
||||||
|
T(0x0d40815b, LD1(List{V27.S()}[0], X10))
|
||||||
|
T(0x0d408755, LD1(List{V21.D()}[0], X26))
|
||||||
|
T(0x4ddf0966, LD1(List{V6.B()}[10], X11, POST_INDEXED, 1))
|
||||||
|
T(0x4dcc1951, LD1(List{V17.B()}[14], X10, POST_INDEXED, X12))
|
||||||
|
T(0x0ddf58cf, LD1(List{V15.H()}[3], X6, POST_INDEXED, 2))
|
||||||
|
T(0x0dd14a3d, LD1(List{V29.H()}[1], X17, POST_INDEXED, X17))
|
||||||
|
T(0x0ddf8072, LD1(List{V18.S()}[0], X3, POST_INDEXED, 4))
|
||||||
|
T(0x4dcb90bb, LD1(List{V27.S()}[3], X5, POST_INDEXED, X11))
|
||||||
|
T(0x4ddf8537, LD1(List{V23.D()}[1], X9, POST_INDEXED, 8))
|
||||||
|
T(0x0dcf8784, LD1(List{V4.D()}[0], X28, POST_INDEXED, X15))
|
||||||
|
T(0x0d40c0f1, LD1R(List{V17.B8()}, X7))
|
||||||
|
T(0x0ddfceac, LD1R(List{V12.D1()}, X21, POST_INDEXED, 8))
|
||||||
|
T(0x4dd5c9c2, LD1R(List{V2.S4()}, X14, POST_INDEXED, X21))
|
||||||
|
T(0x0c408bc8, LD2(List{V8.S2(), V9.S2()}, X30))
|
||||||
|
T(0x0cdf842a, LD2(List{V10.H4(), V11.H4()}, X1, POST_INDEXED, 16))
|
||||||
|
T(0x0cd58678, LD2(List{V24.H4(), V25.H4()}, X19, POST_INDEXED, X21))
|
||||||
|
T(0x0d60132f, LD2(List{V15.B(), V16.B()}[4], X25))
|
||||||
|
T(0x4d605156, LD2(List{V22.H(), V23.H()}[6], X10))
|
||||||
|
T(0x0d609293, LD2(List{V19.S(), V20.S()}[1], X20))
|
||||||
|
T(0x4d608599, LD2(List{V25.D(), V26.D()}[1], X12))
|
||||||
|
T(0x4dff0bd6, LD2(List{V22.B(), V23.B()}[10], X30, POST_INDEXED, 2))
|
||||||
|
T(0x0df90bab, LD2(List{V11.B(), V12.B()}[2], X29, POST_INDEXED, X25))
|
||||||
|
T(0x4dff42c3, LD2(List{V3.H(), V4.H()}[4], X22, POST_INDEXED, 4))
|
||||||
|
T(0x4dfa5816, LD2(List{V22.H(), V23.H()}[7], X0, POST_INDEXED, X26))
|
||||||
|
T(0x4dff9372, LD2(List{V18.S(), V19.S()}[3], X27, POST_INDEXED, 8))
|
||||||
|
T(0x4de483c0, LD2(List{V0.S(), V1.S()}[2], X30, POST_INDEXED, X4))
|
||||||
|
T(0x4dff8714, LD2(List{V20.D(), V21.D()}[1], X24, POST_INDEXED, 16))
|
||||||
|
T(0x4dfa854d, LD2(List{V13.D(), V14.D()}[1], X10, POST_INDEXED, X26))
|
||||||
|
T(0x4d60ca33, LD2R(List{V19.S4(), V20.S4()}, X17))
|
||||||
|
T(0x0dffc777, LD2R(List{V23.H4(), V24.H4()}, X27, POST_INDEXED, 4))
|
||||||
|
T(0x4de9c3cd, LD2R(List{V13.B16(), V14.B16()}, X30, POST_INDEXED, X9))
|
||||||
|
T(0x0c404032, LD3(List{V18.B8(), V19.B8(), V20.B8()}, X1))
|
||||||
|
T(0x0cdf4bc8, LD3(List{V8.S2(), V9.S2(), V10.S2()}, X30, POST_INDEXED, 24))
|
||||||
|
T(0x4ccb4960, LD3(List{V0.S4(), V1.S4(), V2.S4()}, X11, POST_INDEXED, X11))
|
||||||
|
T(0x0d40217c, LD3(List{V28.B(), V29.B(), V30.B()}[0], X11))
|
||||||
|
T(0x4d407a38, LD3(List{V24.H(), V25.H(), V26.H()}[7], X17))
|
||||||
|
T(0x4d40a119, LD3(List{V25.S(), V26.S(), V27.S()}[2], X8))
|
||||||
|
T(0x0d40a6bb, LD3(List{V27.D(), V28.D(), V29.D()}[0], X21))
|
||||||
|
T(0x4ddf2bb1, LD3(List{V17.B(), V18.B(), V19.B()}[10], X29, POST_INDEXED, 3))
|
||||||
|
T(0x4dc13519, LD3(List{V25.B(), V26.B(), V27.B()}[13], X8, POST_INDEXED, X1))
|
||||||
|
T(0x4ddf6b3f, LD3(List{V31.H(), V0.H(), V1.H()}[5], X25, POST_INDEXED, 6))
|
||||||
|
T(0x4dc16243, LD3(List{V3.H(), V4.H(), V5.H()}[4], X18, POST_INDEXED, X1))
|
||||||
|
T(0x4ddfa329, LD3(List{V9.S(), V10.S(), V11.S()}[2], X25, POST_INDEXED, 12))
|
||||||
|
T(0x4ddab328, LD3(List{V8.S(), V9.S(), V10.S()}[3], X25, POST_INDEXED, X26))
|
||||||
|
T(0x4ddfa4e4, LD3(List{V4.D(), V5.D(), V6.D()}[1], X7, POST_INDEXED, 24))
|
||||||
|
T(0x0ddba58c, LD3(List{V12.D(), V13.D(), V14.D()}[0], X12, POST_INDEXED, X27))
|
||||||
|
T(0x0d40e3b3, LD3R(List{V19.B8(), V20.B8(), V21.B8()}, X29))
|
||||||
|
T(0x0ddfe2f3, LD3R(List{V19.B8(), V20.B8(), V21.B8()}, X23, POST_INDEXED, 3))
|
||||||
|
T(0x0ddbe8e4, LD3R(List{V4.S2(), V5.S2(), V6.S2()}, X7, POST_INDEXED, X27))
|
||||||
|
T(0x4c400a69, LD4(List{V9.S4(), V10.S4(), V11.S4(), V12.S4()}, X19))
|
||||||
|
T(0x0cdf0bea, LD4(List{V10.S2(), V11.S2(), V12.S2(), V13.S2()}, SP, POST_INDEXED, 32))
|
||||||
|
T(0x4cd705ad, LD4(List{V13.H8(), V14.H8(), V15.H8(), V16.H8()}, X13, POST_INDEXED, X23))
|
||||||
|
T(0x0d603b97, LD4(List{V23.B(), V24.B(), V25.B(), V26.B()}[6], X28))
|
||||||
|
T(0x0d606941, LD4(List{V1.H(), V2.H(), V3.H(), V4.H()}[1], X10))
|
||||||
|
T(0x0d60a039, LD4(List{V25.S(), V26.S(), V27.S(), V28.S()}[0], X1))
|
||||||
|
T(0x4d60a4c5, LD4(List{V5.D(), V6.D(), V7.D(), V8.D()}[1], X6))
|
||||||
|
T(0x0dff2139, LD4(List{V25.B(), V26.B(), V27.B(), V28.B()}[0], X9, POST_INDEXED, 4))
|
||||||
|
T(0x4df32513, LD4(List{V19.B(), V20.B(), V21.B(), V22.B()}[9], X8, POST_INDEXED, X19))
|
||||||
|
T(0x0dff7b45, LD4(List{V5.H(), V6.H(), V7.H(), V8.H()}[3], X26, POST_INDEXED, 8))
|
||||||
|
T(0x0dfa6839, LD4(List{V25.H(), V26.H(), V27.H(), V28.H()}[1], X1, POST_INDEXED, X26))
|
||||||
|
T(0x4dffa176, LD4(List{V22.S(), V23.S(), V24.S(), V25.S()}[2], X11, POST_INDEXED, 16))
|
||||||
|
T(0x4de0a125, LD4(List{V5.S(), V6.S(), V7.S(), V8.S()}[2], X9, POST_INDEXED, X0))
|
||||||
|
T(0x0dffa4ab, LD4(List{V11.D(), V12.D(), V13.D(), V14.D()}[0], X5, POST_INDEXED, 32))
|
||||||
|
T(0x0dfba784, LD4(List{V4.D(), V5.D(), V6.D(), V7.D()}[0], X28, POST_INDEXED, X27))
|
||||||
|
T(0x4d60ef82, LD4R(List{V2.D2(), V3.D2(), V4.D2(), V5.D2()}, X28))
|
||||||
|
T(0x0dffef23, LD4R(List{V3.D1(), V4.D1(), V5.D1(), V6.D1()}, X25, POST_INDEXED, 32))
|
||||||
|
T(0x4df5e36a, LD4R(List{V10.B16(), V11.B16(), V12.B16(), V13.B16()}, X27, POST_INDEXED, X21))
|
||||||
|
T(0x2c6dde58, LDNP(S24, S23, X18, -148))
|
||||||
|
T(0x6c5f8ad5, LDNP(D21, D2, X22, 504))
|
||||||
|
T(0xac793251, LDNP(Q17, Q12, X18, -224))
|
||||||
|
T(0x2cf1b345, LDP(S5, S12, X26, POST_INDEXED, -116))
|
||||||
|
T(0x6cc9489a, LDP(D26, D18, X4, POST_INDEXED, 144))
|
||||||
|
T(0xace34b69, LDP(Q9, Q18, X27, POST_INDEXED, -928))
|
||||||
|
T(0x2dca159f, LDP(S31, S5, X12, PRE_INDEXED, 80))
|
||||||
|
T(0x6df9682d, LDP(D13, D26, X1, PRE_INDEXED, -112))
|
||||||
|
T(0xadc7566f, LDP(Q15, Q21, X19, PRE_INDEXED, 224))
|
||||||
|
T(0x2d4efb01, LDP(S1, S30, X24, 116))
|
||||||
|
T(0x6d710b5a, LDP(D26, D2, X26, -240))
|
||||||
|
T(0xad74fbb0, LDP(Q16, Q30, X29, -368))
|
||||||
|
T(0x3c5b76a9, LDR(B9, X21, POST_INDEXED, -73))
|
||||||
|
T(0x7c5fd798, LDR(H24, X28, POST_INDEXED, -3))
|
||||||
|
T(0xbc4336b6, LDR(S22, X21, POST_INDEXED, 51))
|
||||||
|
T(0xfc53b4d5, LDR(D21, X6, POST_INDEXED, -197))
|
||||||
|
T(0x3cdf571d, LDR(Q29, X24, POST_INDEXED, -11))
|
||||||
|
T(0x3c5baf77, LDR(B23, X27, PRE_INDEXED, -70))
|
||||||
|
T(0x7c41bc79, LDR(H25, X3, PRE_INDEXED, 27))
|
||||||
|
T(0xbc48ecb2, LDR(S18, X5, PRE_INDEXED, 142))
|
||||||
|
T(0xfc4b1dee, LDR(D14, X15, PRE_INDEXED, 177))
|
||||||
|
T(0x3cc31c6a, LDR(Q10, X3, PRE_INDEXED, 49))
|
||||||
|
T(0x3d5a0ef6, LDR(B22, X23, 1667))
|
||||||
|
T(0x7d5d8dd7, LDR(H23, X14, 3782))
|
||||||
|
T(0xbd55d41a, LDR(S26, X0, 5588))
|
||||||
|
T(0xfd58c566, LDR(D6, X11, 12680))
|
||||||
|
T(0x3dce966e, LDR(Q14, X19, 14928))
|
||||||
|
T(0x1c8599c0, LDR(S0, -1002696))
|
||||||
|
T(0x5c8a1ca4, LDR(D4, -965740))
|
||||||
|
T(0x9cfd90fa, LDR(Q26, -19940))
|
||||||
|
T(0x3c634a12, LDR(B18, X16, W3, UXTW))
|
||||||
|
T(0x3c7368e7, LDR(B7, X7, X19, LSL, 0))
|
||||||
|
T(0x7c646a38, LDR(H24, X17, X4))
|
||||||
|
T(0xbc727bda, LDR(S26, X30, X18, LSL, 2))
|
||||||
|
T(0xfc63eb36, LDR(D22, X25, X3, SXTX))
|
||||||
|
T(0x3ce2ca06, LDR(Q6, X16, W2, SXTW))
|
||||||
|
T(0x3c4233e6, LDUR(B6, SP, 35))
|
||||||
|
T(0x7c4d52f1, LDUR(H17, X23, 213))
|
||||||
|
T(0xbc5be12f, LDUR(S15, X9, -66))
|
||||||
|
T(0xfc474197, LDUR(D23, X12, 116))
|
||||||
|
T(0x3cd703db, LDUR(Q27, X30, -144))
|
||||||
|
T(0x2f9a0354, MLA(V20.S2(), V26.S2(), V26.S()[0]))
|
||||||
|
T(0x4e7e9643, MLA(V3.H8(), V18.H8(), V30.H8()))
|
||||||
|
T(0x2f80484e, MLS(V14.S2(), V2.S2(), V0.S()[2]))
|
||||||
|
T(0x6ebb9572, MLS(V18.S4(), V11.S4(), V27.S4()))
|
||||||
|
T(0x6e135ec1, MOV(V1.B()[9], V22.B()[11]))
|
||||||
|
T(0x4e0f1da9, MOV(V9.B()[7], W13))
|
||||||
|
T(0x5e0e045d, MOV(H29, V2.H()[3]))
|
||||||
|
T(0x0e043ca1, MOV(W1, V5.S()[0]))
|
||||||
|
T(0x4e083df7, MOV(X23, V15.D()[0]))
|
||||||
|
// MOV
|
||||||
|
T(0x0f06e58e, MOVI(V14.B8(), 204))
|
||||||
|
T(0x4f058559, MOVI(V25.H8(), 170))
|
||||||
|
T(0x0f030565, MOVI(V5.S2(), 107))
|
||||||
|
T(0x0f05c4dc, MOVI(V28.S2(), 166, MSL, 8))
|
||||||
|
T(0x2f07e47e, MOVI(D30, RepImm{0b11100011})) //
|
||||||
|
T(0x6f03e65b, MOVI(V27.D2(), RepImm{0b01110010})) //
|
||||||
|
T(0x0f9e813e, MUL(V30.S2(), V9.S2(), V30.S()[0]))
|
||||||
|
T(0x4ea59f8e, MUL(V14.S4(), V28.S4(), V5.S4()))
|
||||||
|
T(0x2e205acd, MVN(V13.B8(), V22.B8()))
|
||||||
|
T(0x2f0084e1, MVNI(V1.H4(), 7))
|
||||||
|
T(0x6f026602, MVNI(V2.S4(), 80, LSL, 24))
|
||||||
|
T(0x2f03c71a, MVNI(V26.S2(), 120, MSL, 8))
|
||||||
|
T(0x7ee0ba9e, NEG(D30, D20))
|
||||||
|
T(0x2ea0b9f7, NEG(V23.S2(), V15.S2()))
|
||||||
|
// NOT
|
||||||
|
T(0x4ef81f0f, ORN(V15.B16(), V24.B16(), V24.B16()))
|
||||||
|
T(0x4f03b4e0, ORR(V0.H8(), 103, LSL, 8))
|
||||||
|
T(0x4f043508, ORR(V8.S4(), 136, LSL, 8))
|
||||||
|
T(0x4eb21c9c, ORR(V28.B16(), V4.B16(), V18.B16()))
|
||||||
|
T(0x2e279d77, PMUL(V23.B8(), V11.B8(), V7.B8()))
|
||||||
|
T(0x4e27e299, PMULL2(V25.H8(), V20.B16(), V7.B16()))
|
||||||
|
T(0x2eab4048, RADDHN(V8.S2(), V2.D2(), V11.D2()))
|
||||||
|
T(0x6e605b7e, RBIT(V30.B16(), V27.B16()))
|
||||||
|
T(0x0e201b37, REV16(V23.B8(), V25.B8()))
|
||||||
|
T(0x6e60098a, REV32(V10.H8(), V12.H8()))
|
||||||
|
T(0x0e2009de, REV64(V30.B8(), V14.B8()))
|
||||||
|
T(0x4f218e4e, RSHRN2(V14.S4(), V18.D2(), 31))
|
||||||
|
T(0x6e7460f2, RSUBHN2(V18.H8(), V7.S4(), V20.S4()))
|
||||||
|
T(0x0e377f74, SABA(V20.B8(), V27.B8(), V23.B8()))
|
||||||
|
T(0x4ea851f6, SABAL2(V22.D2(), V15.S4(), V8.S4()))
|
||||||
|
T(0x0e777752, SABD(V18.H4(), V26.H4(), V23.H4()))
|
||||||
|
T(0x0eba7005, SABDL(V5.D2(), V0.S2(), V26.S2()))
|
||||||
|
T(0x4e2069c4, SADALP(V4.H8(), V14.B16()))
|
||||||
|
T(0x4e270017, SADDL2(V23.H8(), V0.B16(), V7.B16()))
|
||||||
|
T(0x0ea028ca, SADDLP(V10.D1(), V6.S2()))
|
||||||
|
T(0x4e703b2a, SADDLV(S10, V25.H8()))
|
||||||
|
T(0x0e6311d2, SADDW(V18.S4(), V14.S4(), V3.H4()))
|
||||||
|
T(0x1e02c782, SCVTF(S2, W28, 15))
|
||||||
|
T(0x1e42d0e2, SCVTF(D2, W7, 12))
|
||||||
|
T(0x9e02e80e, SCVTF(S14, X0, 6))
|
||||||
|
T(0x9e423dda, SCVTF(D26, X14, 49))
|
||||||
|
T(0x1e2202f3, SCVTF(S19, W23))
|
||||||
|
T(0x1e6201e7, SCVTF(D7, W15))
|
||||||
|
T(0x9e22016c, SCVTF(S12, X11))
|
||||||
|
T(0x9e620316, SCVTF(D22, X24))
|
||||||
|
T(0x5f34e509, SCVTF(S9, S8, 12))
|
||||||
|
T(0x4f5ae716, SCVTF(V22.D2(), V24.D2(), 38))
|
||||||
|
T(0x5e61d946, SCVTF(D6, D10))
|
||||||
|
T(0x4e61d86b, SCVTF(V11.D2(), V3.D2()))
|
||||||
|
// SHA1C
|
||||||
|
// SHA1H
|
||||||
|
// SHA1M
|
||||||
|
// SHA1P
|
||||||
|
// SHA1SU0
|
||||||
|
// SHA1SU1
|
||||||
|
// SHA256H
|
||||||
|
// SHA256H2
|
||||||
|
// SHA256SU0
|
||||||
|
// SHA256SU1
|
||||||
|
T(0x4eb90506, SHADD(V6.S4(), V8.S4(), V25.S4()))
|
||||||
|
T(0x5f4d5767, SHL(D7, D27, 13))
|
||||||
|
T(0x4f1f542f, SHL(V15.H8(), V1.H8(), 15))
|
||||||
|
T(0x2ea13a71, SHLL(V17.D2(), V19.S2(), 32))
|
||||||
|
T(0x4f0885fd, SHRN2(V29.B16(), V15.H8(), 8))
|
||||||
|
T(0x0eb42794, SHSUB(V20.S2(), V28.S2(), V20.S2()))
|
||||||
|
T(0x7f5f54ad, SLI(D13, D5, 31))
|
||||||
|
T(0x6f09554e, SLI(V14.B16(), V10.B16(), 1))
|
||||||
|
T(0x0e316452, SMAX(V18.B8(), V2.B8(), V17.B8()))
|
||||||
|
T(0x4e66a478, SMAXP(V24.H8(), V3.H8(), V6.H8()))
|
||||||
|
T(0x0e30a9e6, SMAXV(B6, V15.B8()))
|
||||||
|
T(0x4e276e2a, SMIN(V10.B16(), V17.B16(), V7.B16()))
|
||||||
|
T(0x4e29ad73, SMINP(V19.B16(), V11.B16(), V9.B16()))
|
||||||
|
T(0x0e71aac5, SMINV(H5, V22.H4()))
|
||||||
|
T(0x4f9f2b00, SMLAL2(V0.D2(), V24.S4(), V31.S()[2]))
|
||||||
|
T(0x4e788037, SMLAL2(V23.S4(), V1.H8(), V24.H8()))
|
||||||
|
T(0x4f7362b9, SMLSL2(V25.S4(), V21.H8(), V3.H()[3]))
|
||||||
|
T(0x0e31a0d5, SMLSL(V21.H8(), V6.B8(), V17.B8()))
|
||||||
|
T(0x0e162fc3, SMOV(W3, V30.H()[5]))
|
||||||
|
T(0x4e0a2cf2, SMOV(X18, V7.H()[2]))
|
||||||
|
T(0x0f6ba85c, SMULL(V28.S4(), V2.H4(), V11.H()[6]))
|
||||||
|
T(0x4e61c2a1, SMULL2(V1.S4(), V21.H8(), V1.H8()))
|
||||||
|
T(0x5e20794c, SQABS(B12, B10))
|
||||||
|
T(0x4e607b9b, SQABS(V27.H8(), V28.H8()))
|
||||||
|
T(0x5eb50df4, SQADD(S20, S15, S21))
|
||||||
|
T(0x0e370ff4, SQADD(V20.B8(), V31.B8(), V23.B8()))
|
||||||
|
T(0x5fab3a4e, SQDMLAL(D14, S18, V11.S()[3]))
|
||||||
|
T(0x4f5b3805, SQDMLAL2(V5.S4(), V0.H8(), V11.H()[5]))
|
||||||
|
T(0x5e7f90ed, SQDMLAL(S13, H7, H31))
|
||||||
|
T(0x0ea992b2, SQDMLAL(V18.D2(), V21.S2(), V9.S2()))
|
||||||
|
T(0x5f867ba2, SQDMLSL(D2, S29, V6.S()[2]))
|
||||||
|
T(0x4f997118, SQDMLSL2(V24.D2(), V8.S4(), V25.S()[0]))
|
||||||
|
T(0x5e62b0b2, SQDMLSL(S18, H5, H2))
|
||||||
|
T(0x0e74b089, SQDMLSL(V9.S4(), V4.H4(), V20.H4()))
|
||||||
|
T(0x5f5acb3c, SQDMULH(H28, H25, V10.H()[5]))
|
||||||
|
T(0x4f7bc13d, SQDMULH(V29.H8(), V9.H8(), V11.H()[3]))
|
||||||
|
T(0x5e6ab724, SQDMULH(H4, H25, H10))
|
||||||
|
T(0x4ea6b543, SQDMULH(V3.S4(), V10.S4(), V6.S4()))
|
||||||
|
T(0x5f89b899, SQDMULL(D25, S4, V9.S()[2]))
|
||||||
|
T(0x0f53b2ee, SQDMULL(V14.S4(), V23.H4(), V3.H()[1]))
|
||||||
|
T(0x5e60d01a, SQDMULL(S26, H0, H0))
|
||||||
|
T(0x0eb4d146, SQDMULL(V6.D2(), V10.S2(), V20.S2()))
|
||||||
|
T(0x7ee07b81, SQNEG(D1, D28))
|
||||||
|
T(0x2e607a04, SQNEG(V4.H4(), V16.H4()))
|
||||||
|
T(0x5f47dac8, SQRDMULH(H8, H22, V7.H()[4]))
|
||||||
|
T(0x0f45db93, SQRDMULH(V19.H4(), V28.H4(), V5.H()[4]))
|
||||||
|
T(0x7ea3b621, SQRDMULH(S1, S17, S3))
|
||||||
|
T(0x6ea2b672, SQRDMULH(V18.S4(), V19.S4(), V2.S4()))
|
||||||
|
T(0x5e7c5ee7, SQRSHL(H7, H23, H28))
|
||||||
|
T(0x4e655e4b, SQRSHL(V11.H8(), V18.H8(), V5.H8()))
|
||||||
|
T(0x5f0c9c10, SQRSHRN(B16, H0, 4))
|
||||||
|
T(0x4f309e99, SQRSHRN2(V25.S4(), V20.D2(), 16))
|
||||||
|
T(0x7f1f8de7, SQRSHRUN(H7, S15, 1))
|
||||||
|
T(0x6f178f67, SQRSHRUN2(V7.H8(), V27.S4(), 9))
|
||||||
|
T(0x5f7977b8, SQSHL(D24, D29, 57))
|
||||||
|
T(0x4f1e75f3, SQSHL(V19.H8(), V15.H8(), 14))
|
||||||
|
T(0x5eb24f5d, SQSHL(S29, S26, S18))
|
||||||
|
T(0x4e7c4c93, SQSHL(V19.H8(), V4.H8(), V28.H8()))
|
||||||
|
T(0x7f2e66a1, SQSHLU(S1, S21, 14))
|
||||||
|
T(0x6f4c65a2, SQSHLU(V2.D2(), V13.D2(), 12))
|
||||||
|
T(0x5f3f950b, SQSHRN(S11, D8, 1))
|
||||||
|
T(0x4f329646, SQSHRN2(V6.S4(), V18.D2(), 14))
|
||||||
|
T(0x7f188469, SQSHRUN(H9, S3, 8))
|
||||||
|
T(0x6f328478, SQSHRUN2(V24.S4(), V3.D2(), 14))
|
||||||
|
T(0x5e362dae, SQSUB(B14, B13, B22))
|
||||||
|
T(0x0e3c2c86, SQSUB(V6.B8(), V4.B8(), V28.B8()))
|
||||||
|
T(0x5ea149fc, SQXTN(S28, D15))
|
||||||
|
T(0x4e214b24, SQXTN2(V4.B16(), V25.H8()))
|
||||||
|
T(0x7e61290e, SQXTUN(H14, S8))
|
||||||
|
T(0x6ea12b96, SQXTUN2(V22.S4(), V28.D2()))
|
||||||
|
T(0x4eae1673, SRHADD(V19.S4(), V19.S4(), V14.S4()))
|
||||||
|
T(0x7f794647, SRI(D7, D18, 7))
|
||||||
|
T(0x6f654787, SRI(V7.D2(), V28.D2(), 27))
|
||||||
|
T(0x5ee0549e, SRSHL(D30, D4, D0))
|
||||||
|
T(0x4eba55d2, SRSHL(V18.S4(), V14.S4(), V26.S4()))
|
||||||
|
T(0x5f712744, SRSHR(D4, D26, 15))
|
||||||
|
T(0x4f2025f5, SRSHR(V21.S4(), V15.S4(), 32))
|
||||||
|
T(0x5f7734a9, SRSRA(D9, D5, 9))
|
||||||
|
T(0x0f3a371a, SRSRA(V26.S2(), V24.S2(), 6))
|
||||||
|
T(0x5eed44ee, SSHL(D14, D7, D13))
|
||||||
|
T(0x0e704683, SSHL(V3.H4(), V20.H4(), V16.H4()))
|
||||||
|
T(0x4f2aa7c3, SSHLL2(V3.D2(), V30.S4(), 10))
|
||||||
|
T(0x5f5e058d, SSHR(D13, D12, 34))
|
||||||
|
T(0x4f730496, SSHR(V22.D2(), V4.D2(), 13))
|
||||||
|
T(0x5f5e152a, SSRA(D10, D9, 34))
|
||||||
|
T(0x0f21172b, SSRA(V11.S2(), V25.S2(), 31))
|
||||||
|
T(0x4e24220f, SSUBL2(V15.H8(), V16.B16(), V4.B16()))
|
||||||
|
T(0x4e3f32a2, SSUBW2(V2.H8(), V21.H8(), V31.B16()))
|
||||||
|
T(0x0c007a62, ST1(List{V2.S2()}, X19))
|
||||||
|
T(0x4c00adb7, ST1(List{V23.D2(), V24.D2()}, X13))
|
||||||
|
T(0x0c006b92, ST1(List{V18.S2(), V19.S2(), V20.S2()}, X28))
|
||||||
|
T(0x4c0029b8, ST1(List{V24.S4(), V25.S4(), V26.S4(), V27.S4()}, X13))
|
||||||
|
T(0x0c9f7f60, ST1(List{V0.D1()}, X27, POST_INDEXED, 8))
|
||||||
|
T(0x0c9f7ebc, ST1(List{V28.D1()}, X21, POST_INDEXED, 8))
|
||||||
|
T(0x0c9faf06, ST1(List{V6.D1(), V7.D1()}, X24, POST_INDEXED, 16))
|
||||||
|
T(0x4c93aff5, ST1(List{V21.D2(), V22.D2()}, SP, POST_INDEXED, X19))
|
||||||
|
T(0x4c9f6398, ST1(List{V24.B16(), V25.B16(), V26.B16()}, X28, POST_INDEXED, 48))
|
||||||
|
T(0x4c8162ff, ST1(List{V31.B16(), V0.B16(), V1.B16()}, X23, POST_INDEXED, X1))
|
||||||
|
T(0x0c9f23ee, ST1(List{V14.B8(), V15.B8(), V16.B8(), V17.B8()}, SP, POST_INDEXED, 32))
|
||||||
|
T(0x4c862148, ST1(List{V8.B16(), V9.B16(), V10.B16(), V11.B16()}, X10, POST_INDEXED, X6))
|
||||||
|
T(0x0d001c7a, ST1(List{V26.B()}[7], X3))
|
||||||
|
T(0x0d005b54, ST1(List{V20.H()}[3], X26))
|
||||||
|
T(0x4d009392, ST1(List{V18.S()}[3], X28))
|
||||||
|
T(0x4d008509, ST1(List{V9.D()}[1], X8))
|
||||||
|
T(0x4d9f1246, ST1(List{V6.B()}[12], X18, POST_INDEXED, 1))
|
||||||
|
T(0x0d8c17f5, ST1(List{V21.B()}[5], SP, POST_INDEXED, X12))
|
||||||
|
T(0x4d9f53ee, ST1(List{V14.H()}[6], SP, POST_INDEXED, 2))
|
||||||
|
T(0x0d8f48c4, ST1(List{V4.H()}[1], X6, POST_INDEXED, X15))
|
||||||
|
T(0x4d9f8185, ST1(List{V5.S()}[2], X12, POST_INDEXED, 4))
|
||||||
|
T(0x0d8c92bc, ST1(List{V28.S()}[1], X21, POST_INDEXED, X12))
|
||||||
|
T(0x4d9f86b3, ST1(List{V19.D()}[1], X21, POST_INDEXED, 8))
|
||||||
|
T(0x4d9c8442, ST1(List{V2.D()}[1], X2, POST_INDEXED, X28))
|
||||||
|
T(0x4c008a69, ST2(List{V9.S4(), V10.S4()}, X19))
|
||||||
|
T(0x4c9f8930, ST2(List{V16.S4(), V17.S4()}, X9, POST_INDEXED, 32))
|
||||||
|
T(0x0c9a8993, ST2(List{V19.S2(), V20.S2()}, X12, POST_INDEXED, X26))
|
||||||
|
T(0x0d2001ac, ST2(List{V12.B(), V13.B()}[0], X13))
|
||||||
|
T(0x4d20495c, ST2(List{V28.H(), V29.H()}[5], X10))
|
||||||
|
T(0x4d2093e4, ST2(List{V4.S(), V5.S()}[3], SP))
|
||||||
|
T(0x4d208482, ST2(List{V2.D(), V3.D()}[1], X4))
|
||||||
|
T(0x4dbf0e40, ST2(List{V0.B(), V1.B()}[11], X18, POST_INDEXED, 2))
|
||||||
|
T(0x0db8085f, ST2(List{V31.B(), V0.B()}[2], X2, POST_INDEXED, X24))
|
||||||
|
T(0x0dbf4a2d, ST2(List{V13.H(), V14.H()}[1], X17, POST_INDEXED, 4))
|
||||||
|
T(0x4db1417e, ST2(List{V30.H(), V31.H()}[4], X11, POST_INDEXED, X17))
|
||||||
|
T(0x0dbf81af, ST2(List{V15.S(), V16.S()}[0], X13, POST_INDEXED, 8))
|
||||||
|
T(0x0dbf831c, ST2(List{V28.S(), V29.S()}[0], X24, POST_INDEXED, 8))
|
||||||
|
T(0x0dbf846a, ST2(List{V10.D(), V11.D()}[0], X3, POST_INDEXED, 16))
|
||||||
|
T(0x0dab85dc, ST2(List{V28.D(), V29.D()}[0], X14, POST_INDEXED, X11))
|
||||||
|
T(0x0c004a09, ST3(List{V9.S2(), V10.S2(), V11.S2()}, X16))
|
||||||
|
T(0x4c9f4768, ST3(List{V8.H8(), V9.H8(), V10.H8()}, X27, POST_INDEXED, 48))
|
||||||
|
T(0x0c944918, ST3(List{V24.S2(), V25.S2(), V26.S2()}, X8, POST_INDEXED, X20))
|
||||||
|
T(0x0d003f80, ST3(List{V0.B(), V1.B(), V2.B()}[7], X28))
|
||||||
|
T(0x0d007306, ST3(List{V6.H(), V7.H(), V8.H()}[2], X24))
|
||||||
|
T(0x0d00b131, ST3(List{V17.S(), V18.S(), V19.S()}[1], X9))
|
||||||
|
T(0x4d00a5f8, ST3(List{V24.D(), V25.D(), V26.D()}[1], X15))
|
||||||
|
T(0x0d9f27c1, ST3(List{V1.B(), V2.B(), V3.B()}[1], X30, POST_INDEXED, 3))
|
||||||
|
T(0x4d992bb2, ST3(List{V18.B(), V19.B(), V20.B()}[10], X29, POST_INDEXED, X25))
|
||||||
|
T(0x0d9f785d, ST3(List{V29.H(), V30.H(), V31.H()}[3], X2, POST_INDEXED, 6))
|
||||||
|
T(0x4d8b726b, ST3(List{V11.H(), V12.H(), V13.H()}[6], X19, POST_INDEXED, X11))
|
||||||
|
T(0x4d9fa342, ST3(List{V2.S(), V3.S(), V4.S()}[2], X26, POST_INDEXED, 12))
|
||||||
|
T(0x4d80b206, ST3(List{V6.S(), V7.S(), V8.S()}[3], X16, POST_INDEXED, X0))
|
||||||
|
T(0x4d9fa5de, ST3(List{V30.D(), V31.D(), V0.D()}[1], X14, POST_INDEXED, 24))
|
||||||
|
T(0x4d8ba6d7, ST3(List{V23.D(), V24.D(), V25.D()}[1], X22, POST_INDEXED, X11))
|
||||||
|
T(0x0c00034f, ST4(List{V15.B8(), V16.B8(), V17.B8(), V18.B8()}, X26))
|
||||||
|
T(0x4c9f038c, ST4(List{V12.B16(), V13.B16(), V14.B16(), V15.B16()}, X28, POST_INDEXED, 64))
|
||||||
|
T(0x4c800719, ST4(List{V25.H8(), V26.H8(), V27.H8(), V28.H8()}, X24, POST_INDEXED, X0))
|
||||||
|
T(0x0d2021a8, ST4(List{V8.B(), V9.B(), V10.B(), V11.B()}[0], X13))
|
||||||
|
T(0x4d2062cd, ST4(List{V13.H(), V14.H(), V15.H(), V16.H()}[4], X22))
|
||||||
|
T(0x0d20b146, ST4(List{V6.S(), V7.S(), V8.S(), V9.S()}[1], X10))
|
||||||
|
T(0x4d20a6f5, ST4(List{V21.D(), V22.D(), V23.D(), V24.D()}[1], X23))
|
||||||
|
T(0x0dbf2d56, ST4(List{V22.B(), V23.B(), V24.B(), V25.B()}[3], X10, POST_INDEXED, 4))
|
||||||
|
T(0x4da631df, ST4(List{V31.B(), V0.B(), V1.B(), V2.B()}[12], X14, POST_INDEXED, X6))
|
||||||
|
T(0x0dbf7a76, ST4(List{V22.H(), V23.H(), V24.H(), V25.H()}[3], X19, POST_INDEXED, 8))
|
||||||
|
T(0x0dbb698e, ST4(List{V14.H(), V15.H(), V16.H(), V17.H()}[1], X12, POST_INDEXED, X27))
|
||||||
|
T(0x4dbfb37f, ST4(List{V31.S(), V0.S(), V1.S(), V2.S()}[3], X27, POST_INDEXED, 16))
|
||||||
|
T(0x4dadb3d1, ST4(List{V17.S(), V18.S(), V19.S(), V20.S()}[3], X30, POST_INDEXED, X13))
|
||||||
|
T(0x4dbfa5b3, ST4(List{V19.D(), V20.D(), V21.D(), V22.D()}[1], X13, POST_INDEXED, 32))
|
||||||
|
T(0x4db5a7cf, ST4(List{V15.D(), V16.D(), V17.D(), V18.D()}[1], X30, POST_INDEXED, X21))
|
||||||
|
T(0x2c29149a, STNP(S26, S5, X4, -184))
|
||||||
|
T(0x6c229316, STNP(D22, D4, X24, -472))
|
||||||
|
T(0xac3bc3c8, STNP(Q8, Q16, X30, -144))
|
||||||
|
T(0x2cacdf66, STP(S6, S23, X27, POST_INDEXED, -156))
|
||||||
|
T(0x6c826f4f, STP(D15, D27, X26, POST_INDEXED, 32))
|
||||||
|
T(0xac97955a, STP(Q26, Q5, X10, POST_INDEXED, 752))
|
||||||
|
T(0x2da7ba37, STP(S23, S14, X17, PRE_INDEXED, -196))
|
||||||
|
T(0x6d8bcbce, STP(D14, D18, X30, PRE_INDEXED, 184))
|
||||||
|
T(0xad8b4ba6, STP(Q6, Q18, X29, PRE_INDEXED, 352))
|
||||||
|
T(0x2d1f7434, STP(S20, S29, X1, 248))
|
||||||
|
T(0x6d3bb5d8, STP(D24, D13, X14, -72))
|
||||||
|
T(0xad09088a, STP(Q10, Q2, X4, 288))
|
||||||
|
T(0x3c066467, STR(B7, X3, POST_INDEXED, 102))
|
||||||
|
T(0x7c070723, STR(H3, X25, POST_INDEXED, 112))
|
||||||
|
T(0xbc13175a, STR(S26, X26, POST_INDEXED, -207))
|
||||||
|
T(0xfc1be536, STR(D22, X9, POST_INDEXED, -66))
|
||||||
|
T(0x3c99b56b, STR(Q11, X11, POST_INDEXED, -101))
|
||||||
|
T(0x3c002d49, STR(B9, X10, PRE_INDEXED, 2))
|
||||||
|
T(0x7c158e09, STR(H9, X16, PRE_INDEXED, -168))
|
||||||
|
T(0xbc06bc8d, STR(S13, X4, PRE_INDEXED, 107))
|
||||||
|
T(0xfc080eae, STR(D14, X21, PRE_INDEXED, 128))
|
||||||
|
T(0x3c8e7ed9, STR(Q25, X22, PRE_INDEXED, 231))
|
||||||
|
T(0x3d275492, STR(B18, X4, 2517))
|
||||||
|
T(0x7d0b4265, STR(H5, X19, 1440))
|
||||||
|
T(0xbd0d2595, STR(S21, X12, 3364))
|
||||||
|
T(0xfd237a73, STR(D19, X19, 18160))
|
||||||
|
T(0x3db4a5f5, STR(Q21, X15, 53904))
|
||||||
|
T(0x3c3e693c, STR(B28, X9, X30, LSL, 0))
|
||||||
|
T(0x3c3b6ac5, STR(B5, X22, X27, LSL, 0))
|
||||||
|
T(0x7c36faf0, STR(H16, X23, X22, SXTX, 1))
|
||||||
|
T(0xbc27f838, STR(S24, X1, X7, SXTX, 2))
|
||||||
|
T(0xfc29db51, STR(D17, X26, W9, SXTW, 3))
|
||||||
|
T(0x3cbfea8f, STR(Q15, X20, XZR, SXTX))
|
||||||
|
T(0x3c0441c8, STUR(B8, X14, 68))
|
||||||
|
T(0x7c00b0d7, STUR(H23, X6, 11))
|
||||||
|
T(0xbc0d117d, STUR(S29, X11, 209))
|
||||||
|
T(0xfc1f03c0, STUR(D0, X30, -16))
|
||||||
|
T(0x3c9753f0, STUR(Q16, SP, -139))
|
||||||
|
T(0x7eeb84f9, SUB(D25, D7, D11))
|
||||||
|
T(0x6e708714, SUB(V20.H8(), V24.H8(), V16.H8()))
|
||||||
|
T(0x4e766323, SUBHN2(V3.H8(), V25.S4(), V22.S4()))
|
||||||
|
T(0x5e203935, SUQADD(B21, B9))
|
||||||
|
T(0x4e203b33, SUQADD(V19.B16(), V25.B16()))
|
||||||
|
// SXTL
|
||||||
|
T(0x0e0c20db, TBL(V27.B8(), List{V6.B16(), V7.B16()}, V12.B8()))
|
||||||
|
T(0x4e1d43ab, TBL(V11.B16(), List{V29.B16(), V30.B16(), V31.B16()}, V29.B16()))
|
||||||
|
T(0x0e07634f, TBL(V15.B8(), List{V26.B16(), V27.B16(), V28.B16(), V29.B16()}, V7.B8()))
|
||||||
|
T(0x0e0603b9, TBL(V25.B8(), List{V29.B16()}, V6.B8()))
|
||||||
|
T(0x0e05317a, TBX(V26.B8(), List{V11.B16(), V12.B16()}, V5.B8()))
|
||||||
|
T(0x4e0150ca, TBX(V10.B16(), List{V6.B16(), V7.B16(), V8.B16()}, V1.B16()))
|
||||||
|
T(0x4e0e7190, TBX(V16.B16(), List{V12.B16(), V13.B16(), V14.B16(), V15.B16()}, V14.B16()))
|
||||||
|
T(0x4e1b1333, TBX(V19.B16(), List{V25.B16()}, V27.B16()))
|
||||||
|
T(0x4e0829e3, TRN1(V3.B16(), V15.B16(), V8.B16()))
|
||||||
|
T(0x4ecc6b24, TRN2(V4.D2(), V25.D2(), V12.D2()))
|
||||||
|
T(0x2e697f5d, UABA(V29.H4(), V26.H4(), V9.H4()))
|
||||||
|
T(0x2e36519e, UABAL(V30.H8(), V12.B8(), V22.B8()))
|
||||||
|
T(0x6e6975e0, UABD(V0.H8(), V15.H8(), V9.H8()))
|
||||||
|
T(0x2e2e718a, UABDL(V10.H8(), V12.B8(), V14.B8()))
|
||||||
|
T(0x6ea069b1, UADALP(V17.D2(), V13.S4()))
|
||||||
|
T(0x2e6d0349, UADDL(V9.S4(), V26.H4(), V13.H4()))
|
||||||
|
T(0x6e602bfc, UADDLP(V28.S4(), V31.H8()))
|
||||||
|
T(0x6e703b6d, UADDLV(S13, V27.H8()))
|
||||||
|
T(0x2e781352, UADDW(V18.S4(), V26.S4(), V24.H4()))
|
||||||
|
T(0x1e03ec95, UCVTF(S21, W4, 5))
|
||||||
|
T(0x1e43fd36, UCVTF(D22, W9, 1))
|
||||||
|
T(0x9e03a27b, UCVTF(S27, X19, 24))
|
||||||
|
T(0x9e43e9c4, UCVTF(D4, X14, 6))
|
||||||
|
T(0x1e230096, UCVTF(S22, W4))
|
||||||
|
T(0x1e630076, UCVTF(D22, W3))
|
||||||
|
T(0x9e2302c8, UCVTF(S8, X22))
|
||||||
|
T(0x9e6302cd, UCVTF(D13, X22))
|
||||||
|
T(0x7f2ce5a2, UCVTF(S2, S13, 20))
|
||||||
|
T(0x6f4be788, UCVTF(V8.D2(), V28.D2(), 53))
|
||||||
|
T(0x7e21d87f, UCVTF(S31, S3))
|
||||||
|
T(0x2e21da7d, UCVTF(V29.S2(), V19.S2()))
|
||||||
|
T(0x2e7b0674, UHADD(V20.H4(), V19.H4(), V27.H4()))
|
||||||
|
T(0x6ea9277f, UHSUB(V31.S4(), V27.S4(), V9.S4()))
|
||||||
|
T(0x6e7a6658, UMAX(V24.H8(), V18.H8(), V26.H8()))
|
||||||
|
T(0x2e23a513, UMAXP(V19.B8(), V8.B8(), V3.B8()))
|
||||||
|
T(0x2e70a9b5, UMAXV(H21, V13.H4()))
|
||||||
|
T(0x6e7d6ef2, UMIN(V18.H8(), V23.H8(), V29.H8()))
|
||||||
|
T(0x2e6eae4e, UMINP(V14.H4(), V18.H4(), V14.H4()))
|
||||||
|
T(0x2e71abe6, UMINV(H6, V31.H4()))
|
||||||
|
T(0x6fb820fa, UMLAL2(V26.D2(), V7.S4(), V24.S()[1]))
|
||||||
|
T(0x6ebc83ab, UMLAL2(V11.D2(), V29.S4(), V28.S4()))
|
||||||
|
T(0x2f5c61cf, UMLSL(V15.S4(), V14.H4(), V12.H()[1]))
|
||||||
|
T(0x6e6aa2e2, UMLSL2(V2.S4(), V23.H8(), V10.H8()))
|
||||||
|
T(0x0e0f3fb8, UMOV(W24, V29.B()[7]))
|
||||||
|
// UMOV
|
||||||
|
T(0x6f62a05c, UMULL2(V28.S4(), V2.H8(), V2.H()[2]))
|
||||||
|
T(0x6e6cc3b0, UMULL2(V16.S4(), V29.H8(), V12.H8()))
|
||||||
|
T(0x7ea40f68, UQADD(S8, S27, S4))
|
||||||
|
T(0x6eac0e8f, UQADD(V15.S4(), V20.S4(), V12.S4()))
|
||||||
|
T(0x7e2a5df5, UQRSHL(B21, B15, B10))
|
||||||
|
T(0x6ef55fc9, UQRSHL(V9.D2(), V30.D2(), V21.D2()))
|
||||||
|
T(0x7f0b9db4, UQRSHRN(B20, H13, 5))
|
||||||
|
T(0x2f159d7d, UQRSHRN(V29.H4(), V11.S4(), 11))
|
||||||
|
T(0x7f6c755c, UQSHL(D28, D10, 44))
|
||||||
|
T(0x6f6175ec, UQSHL(V12.D2(), V15.D2(), 33))
|
||||||
|
T(0x7eef4ff4, UQSHL(D20, D31, D15))
|
||||||
|
T(0x6e3d4f2e, UQSHL(V14.B16(), V25.B16(), V29.B16()))
|
||||||
|
T(0x7f1f94d2, UQSHRN(H18, S6, 1))
|
||||||
|
T(0x6f3397e4, UQSHRN2(V4.S4(), V31.D2(), 13))
|
||||||
|
T(0x7ee12cad, UQSUB(D13, D5, D1))
|
||||||
|
T(0x2e712ff3, UQSUB(V19.H4(), V31.H4(), V17.H4()))
|
||||||
|
T(0x7e614b06, UQXTN(H6, S24))
|
||||||
|
T(0x6e2149ec, UQXTN2(V12.B16(), V15.H8()))
|
||||||
|
T(0x0ea1c849, URECPE(V9.S2(), V2.S2()))
|
||||||
|
T(0x6eb51740, URHADD(V0.S4(), V26.S4(), V21.S4()))
|
||||||
|
T(0x7eeb57f8, URSHL(D24, D31, D11))
|
||||||
|
T(0x6e335531, URSHL(V17.B16(), V9.B16(), V19.B16()))
|
||||||
|
T(0x7f65253d, URSHR(D29, D9, 27))
|
||||||
|
T(0x2f102566, URSHR(V6.H4(), V11.H4(), 16))
|
||||||
|
T(0x2ea1cb59, URSQRTE(V25.S2(), V26.S2()))
|
||||||
|
T(0x7f54345f, URSRA(D31, D2, 44))
|
||||||
|
T(0x2f1b345f, URSRA(V31.H4(), V2.H4(), 5))
|
||||||
|
T(0x7ef94448, USHL(D8, D2, D25))
|
||||||
|
T(0x6ea14621, USHL(V1.S4(), V17.S4(), V1.S4()))
|
||||||
|
T(0x2f33a5a1, USHLL(V1.D2(), V13.S2(), 19))
|
||||||
|
T(0x7f5405d0, USHR(D16, D14, 44))
|
||||||
|
T(0x6f450505, USHR(V5.D2(), V8.D2(), 59))
|
||||||
|
T(0x7ea038c1, USQADD(S1, S6))
|
||||||
|
T(0x2e203b60, USQADD(V0.B8(), V27.B8()))
|
||||||
|
T(0x7f4616d2, USRA(D18, D22, 58))
|
||||||
|
T(0x2f1a1713, USRA(V19.H4(), V24.H4(), 6))
|
||||||
|
T(0x2e3f226e, USUBL(V14.H8(), V19.B8(), V31.B8()))
|
||||||
|
T(0x6e7a33a0, USUBW2(V0.S4(), V29.S4(), V26.H8()))
|
||||||
|
// UXTL
|
||||||
|
T(0x4e1b1a1f, UZP1(V31.B16(), V16.B16(), V27.B16()))
|
||||||
|
T(0x4ecc597b, UZP2(V27.D2(), V11.D2(), V12.D2()))
|
||||||
|
T(0x0e212af7, XTN(V23.B8(), V23.H8()))
|
||||||
|
T(0x4e853928, ZIP1(V8.S4(), V9.S4(), V5.S4()))
|
||||||
|
T(0x0e977a78, ZIP2(V24.S2(), V19.S2(), V23.S2()))
|
1079
externals/dynarmic/externals/oaknut/tests/general.cpp
vendored
Executable file
1079
externals/dynarmic/externals/oaknut/tests/general.cpp
vendored
Executable file
File diff suppressed because it is too large
Load diff
20
externals/dynarmic/externals/oaknut/tests/rand_int.hpp
vendored
Executable file
20
externals/dynarmic/externals/oaknut/tests/rand_int.hpp
vendored
Executable file
|
@ -0,0 +1,20 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <random>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
T RandInt(T min, T max)
|
||||||
|
{
|
||||||
|
static_assert(std::is_integral_v<T>, "T must be an integral type.");
|
||||||
|
static_assert(!std::is_same_v<T, signed char> && !std::is_same_v<T, unsigned char>,
|
||||||
|
"Using char with uniform_int_distribution is undefined behavior.");
|
||||||
|
|
||||||
|
static std::random_device rd;
|
||||||
|
static std::mt19937 mt(rd());
|
||||||
|
std::uniform_int_distribution<T> rand(min, max);
|
||||||
|
return rand(mt);
|
||||||
|
}
|
|
@ -235,8 +235,9 @@ void A32EmitX64::GenTerminalHandlers() {
|
||||||
calculate_location_descriptor();
|
calculate_location_descriptor();
|
||||||
code.L(rsb_cache_miss);
|
code.L(rsb_cache_miss);
|
||||||
code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data()));
|
code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data()));
|
||||||
|
code.mov(rbp, rbx);
|
||||||
if (code.HasHostFeature(HostFeature::SSE42)) {
|
if (code.HasHostFeature(HostFeature::SSE42)) {
|
||||||
code.crc32(ebp, r12d);
|
code.crc32(rbp, r12);
|
||||||
}
|
}
|
||||||
code.and_(ebp, fast_dispatch_table_mask);
|
code.and_(ebp, fast_dispatch_table_mask);
|
||||||
code.lea(rbp, ptr[r12 + rbp]);
|
code.lea(rbp, ptr[r12 + rbp]);
|
||||||
|
@ -254,11 +255,12 @@ void A32EmitX64::GenTerminalHandlers() {
|
||||||
fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry& (*)(u64)>();
|
fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry& (*)(u64)>();
|
||||||
code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data()));
|
code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data()));
|
||||||
if (code.HasHostFeature(HostFeature::SSE42)) {
|
if (code.HasHostFeature(HostFeature::SSE42)) {
|
||||||
code.crc32(code.ABI_PARAM1.cvt32(), code.ABI_PARAM2.cvt32());
|
code.crc32(code.ABI_PARAM1, code.ABI_PARAM2);
|
||||||
}
|
}
|
||||||
code.and_(code.ABI_PARAM1.cvt32(), fast_dispatch_table_mask);
|
code.and_(code.ABI_PARAM1.cvt32(), fast_dispatch_table_mask);
|
||||||
code.lea(code.ABI_RETURN, code.ptr[code.ABI_PARAM1 + code.ABI_PARAM2]);
|
code.lea(code.ABI_RETURN, code.ptr[code.ABI_PARAM1 + code.ABI_PARAM2]);
|
||||||
code.ret();
|
code.ret();
|
||||||
|
PerfMapRegister(fast_dispatch_table_lookup, code.getCurr(), "a32_fast_dispatch_table_lookup");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -101,6 +101,10 @@ struct Jit::Impl {
|
||||||
Atomic::Or(&jit_state.halt_reason, static_cast<u32>(hr));
|
Atomic::Or(&jit_state.halt_reason, static_cast<u32>(hr));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ClearHalt(HaltReason hr) {
|
||||||
|
Atomic::And(&jit_state.halt_reason, ~static_cast<u32>(hr));
|
||||||
|
}
|
||||||
|
|
||||||
void ClearExclusiveState() {
|
void ClearExclusiveState() {
|
||||||
jit_state.exclusive_state = 0;
|
jit_state.exclusive_state = 0;
|
||||||
}
|
}
|
||||||
|
@ -232,6 +236,10 @@ void Jit::HaltExecution(HaltReason hr) {
|
||||||
impl->HaltExecution(hr);
|
impl->HaltExecution(hr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Jit::ClearHalt(HaltReason hr) {
|
||||||
|
impl->ClearHalt(hr);
|
||||||
|
}
|
||||||
|
|
||||||
void Jit::ClearExclusiveState() {
|
void Jit::ClearExclusiveState() {
|
||||||
impl->ClearExclusiveState();
|
impl->ClearExclusiveState();
|
||||||
}
|
}
|
||||||
|
|
|
@ -193,8 +193,9 @@ void A64EmitX64::GenTerminalHandlers() {
|
||||||
calculate_location_descriptor();
|
calculate_location_descriptor();
|
||||||
code.L(rsb_cache_miss);
|
code.L(rsb_cache_miss);
|
||||||
code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data()));
|
code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data()));
|
||||||
|
code.mov(rbp, rbx);
|
||||||
if (code.HasHostFeature(HostFeature::SSE42)) {
|
if (code.HasHostFeature(HostFeature::SSE42)) {
|
||||||
code.crc32(rbx, r12d);
|
code.crc32(rbp, r12);
|
||||||
}
|
}
|
||||||
code.and_(ebp, fast_dispatch_table_mask);
|
code.and_(ebp, fast_dispatch_table_mask);
|
||||||
code.lea(rbp, ptr[r12 + rbp]);
|
code.lea(rbp, ptr[r12 + rbp]);
|
||||||
|
@ -215,7 +216,7 @@ void A64EmitX64::GenTerminalHandlers() {
|
||||||
code.crc32(code.ABI_PARAM1, code.ABI_PARAM2);
|
code.crc32(code.ABI_PARAM1, code.ABI_PARAM2);
|
||||||
}
|
}
|
||||||
code.and_(code.ABI_PARAM1.cvt32(), fast_dispatch_table_mask);
|
code.and_(code.ABI_PARAM1.cvt32(), fast_dispatch_table_mask);
|
||||||
code.lea(code.ABI_RETURN, code.ptr[code.ABI_PARAM1 + code.ABI_PARAM2]);
|
code.lea(code.ABI_RETURN, code.ptr[code.ABI_PARAM2 + code.ABI_PARAM1]);
|
||||||
code.ret();
|
code.ret();
|
||||||
PerfMapRegister(fast_dispatch_table_lookup, code.getCurr(), "a64_fast_dispatch_table_lookup");
|
PerfMapRegister(fast_dispatch_table_lookup, code.getCurr(), "a64_fast_dispatch_table_lookup");
|
||||||
}
|
}
|
||||||
|
|
|
@ -134,6 +134,10 @@ public:
|
||||||
Atomic::Or(&jit_state.halt_reason, static_cast<u32>(hr));
|
Atomic::Or(&jit_state.halt_reason, static_cast<u32>(hr));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ClearHalt(HaltReason hr) {
|
||||||
|
Atomic::And(&jit_state.halt_reason, ~static_cast<u32>(hr));
|
||||||
|
}
|
||||||
|
|
||||||
u64 GetSP() const {
|
u64 GetSP() const {
|
||||||
return jit_state.sp;
|
return jit_state.sp;
|
||||||
}
|
}
|
||||||
|
@ -351,6 +355,10 @@ void Jit::HaltExecution(HaltReason hr) {
|
||||||
impl->HaltExecution(hr);
|
impl->HaltExecution(hr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Jit::ClearHalt(HaltReason hr) {
|
||||||
|
impl->ClearHalt(hr);
|
||||||
|
}
|
||||||
|
|
||||||
u64 Jit::GetSP() const {
|
u64 Jit::GetSP() const {
|
||||||
return impl->GetSP();
|
return impl->GetSP();
|
||||||
}
|
}
|
||||||
|
|
|
@ -402,7 +402,7 @@ void BlockOfCode::LookupBlock() {
|
||||||
cb.LookupBlock->EmitCall(*this);
|
cb.LookupBlock->EmitCall(*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
Xbyak::Address BlockOfCode::MConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) {
|
Xbyak::Address BlockOfCode::XmmConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) {
|
||||||
return constant_pool.GetConstant(frame, lower, upper);
|
return constant_pool.GetConstant(frame, lower, upper);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "dynarmic/backend/x64/jitstate_info.h"
|
#include "dynarmic/backend/x64/jitstate_info.h"
|
||||||
#include "dynarmic/common/cast_util.h"
|
#include "dynarmic/common/cast_util.h"
|
||||||
#include "dynarmic/interface/halt_reason.h"
|
#include "dynarmic/interface/halt_reason.h"
|
||||||
|
#include "mcl/bit/bit_field.hpp"
|
||||||
|
|
||||||
namespace Dynarmic::Backend::X64 {
|
namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
|
@ -116,7 +117,13 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Xbyak::Address MConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper = 0);
|
Xbyak::Address XmmConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper);
|
||||||
|
|
||||||
|
template<size_t esize>
|
||||||
|
Xbyak::Address XmmBConst(const Xbyak::AddressFrame& frame, u64 value) {
|
||||||
|
return XmmConst(frame, mcl::bit::replicate_element<u64>(esize, value),
|
||||||
|
mcl::bit::replicate_element<u64>(esize, value));
|
||||||
|
}
|
||||||
|
|
||||||
/// Far code sits far away from the near code. Execution remains primarily in near code.
|
/// Far code sits far away from the near code. Execution remains primarily in near code.
|
||||||
/// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary.
|
/// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary.
|
||||||
|
|
|
@ -14,22 +14,22 @@
|
||||||
namespace Dynarmic::Backend::X64 {
|
namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
ConstantPool::ConstantPool(BlockOfCode& code, size_t size)
|
ConstantPool::ConstantPool(BlockOfCode& code, size_t size)
|
||||||
: code(code), pool_size(size) {
|
: code(code), insertion_point(0) {
|
||||||
code.int3();
|
code.int3();
|
||||||
code.align(align_size);
|
code.align(align_size);
|
||||||
pool_begin = reinterpret_cast<u8*>(code.AllocateFromCodeSpace(size));
|
pool = std::span<ConstantT>(
|
||||||
current_pool_ptr = pool_begin;
|
reinterpret_cast<ConstantT*>(code.AllocateFromCodeSpace(size)), size / align_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
Xbyak::Address ConstantPool::GetConstant(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) {
|
Xbyak::Address ConstantPool::GetConstant(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) {
|
||||||
const auto constant = std::make_pair(lower, upper);
|
const auto constant = ConstantT(lower, upper);
|
||||||
auto iter = constant_info.find(constant);
|
auto iter = constant_info.find(constant);
|
||||||
if (iter == constant_info.end()) {
|
if (iter == constant_info.end()) {
|
||||||
ASSERT(static_cast<size_t>(current_pool_ptr - pool_begin) < pool_size);
|
ASSERT(insertion_point < pool.size());
|
||||||
std::memcpy(current_pool_ptr, &lower, sizeof(u64));
|
ConstantT& target_constant = pool[insertion_point];
|
||||||
std::memcpy(current_pool_ptr + sizeof(u64), &upper, sizeof(u64));
|
target_constant = constant;
|
||||||
iter = constant_info.emplace(constant, current_pool_ptr).first;
|
iter = constant_info.emplace(constant, &target_constant).first;
|
||||||
current_pool_ptr += align_size;
|
++insertion_point;
|
||||||
}
|
}
|
||||||
return frame[code.rip + iter->second];
|
return frame[code.rip + iter->second];
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,8 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <bit>
|
#include <bit>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <span>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#include <mcl/stdint.hpp>
|
#include <mcl/stdint.hpp>
|
||||||
|
@ -29,18 +31,20 @@ public:
|
||||||
private:
|
private:
|
||||||
static constexpr size_t align_size = 16; // bytes
|
static constexpr size_t align_size = 16; // bytes
|
||||||
|
|
||||||
|
using ConstantT = std::pair<u64, u64>;
|
||||||
|
static_assert(sizeof(ConstantT) == align_size);
|
||||||
|
|
||||||
struct ConstantHash {
|
struct ConstantHash {
|
||||||
std::size_t operator()(const std::pair<u64, u64>& constant) const noexcept {
|
std::size_t operator()(const ConstantT& constant) const noexcept {
|
||||||
return constant.first ^ std::rotl<u64>(constant.second, 1);
|
return constant.first ^ std::rotl<u64>(constant.second, 1);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
tsl::robin_map<std::pair<u64, u64>, void*, ConstantHash> constant_info;
|
tsl::robin_map<ConstantT, void*, ConstantHash> constant_info;
|
||||||
|
|
||||||
BlockOfCode& code;
|
BlockOfCode& code;
|
||||||
size_t pool_size;
|
std::span<ConstantT> pool;
|
||||||
u8* pool_begin;
|
std::size_t insertion_point;
|
||||||
u8* current_pool_ptr;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -325,8 +325,10 @@ void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_co
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::Unpatch(const IR::LocationDescriptor& target_desc) {
|
void EmitX64::Unpatch(const IR::LocationDescriptor& target_desc) {
|
||||||
|
if (patch_information.count(target_desc)) {
|
||||||
Patch(target_desc, nullptr);
|
Patch(target_desc, nullptr);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::ClearCache() {
|
void EmitX64::ClearCache() {
|
||||||
block_descriptors.clear();
|
block_descriptors.clear();
|
||||||
|
@ -345,9 +347,8 @@ void EmitX64::InvalidateBasicBlocks(const tsl::robin_set<IR::LocationDescriptor>
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (patch_information.count(descriptor)) {
|
|
||||||
Unpatch(descriptor);
|
Unpatch(descriptor);
|
||||||
}
|
|
||||||
block_descriptors.erase(it);
|
block_descriptors.erase(it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,7 +42,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
||||||
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.movdqa(xmm_const, code.MConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
|
code.movdqa(xmm_const, code.XmmConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
|
||||||
|
|
||||||
code.movzx(value.cvt32(), value.changeBit(data_size));
|
code.movzx(value.cvt32(), value.changeBit(data_size));
|
||||||
code.xor_(value.cvt32(), crc);
|
code.xor_(value.cvt32(), crc);
|
||||||
|
@ -72,7 +72,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
||||||
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.movdqa(xmm_const, code.MConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
|
code.movdqa(xmm_const, code.XmmConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
|
||||||
|
|
||||||
code.xor_(crc, value);
|
code.xor_(crc, value);
|
||||||
code.shl(crc.cvt64(), 32);
|
code.shl(crc.cvt64(), 32);
|
||||||
|
@ -93,7 +93,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
||||||
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.movdqa(xmm_const, code.MConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
|
code.movdqa(xmm_const, code.XmmConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
|
||||||
|
|
||||||
code.mov(crc, crc);
|
code.mov(crc, crc);
|
||||||
code.xor_(crc.cvt64(), value);
|
code.xor_(crc.cvt64(), value);
|
||||||
|
|
|
@ -90,10 +90,9 @@ void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list
|
||||||
FpFixup::Norm_Src,
|
FpFixup::Norm_Src,
|
||||||
FpFixup::Norm_Src,
|
FpFixup::Norm_Src,
|
||||||
FpFixup::Norm_Src);
|
FpFixup::Norm_Src);
|
||||||
constexpr u64 denormal_to_zero64 = mcl::bit::replicate_element<fsize, u64>(denormal_to_zero);
|
|
||||||
|
|
||||||
const Xbyak::Xmm tmp = xmm16;
|
const Xbyak::Xmm tmp = xmm16;
|
||||||
FCODE(vmovap)(tmp, code.MConst(xword, u64(denormal_to_zero64), u64(denormal_to_zero64)));
|
FCODE(vmovap)(tmp, code.XmmBConst<fsize>(xword, denormal_to_zero));
|
||||||
|
|
||||||
for (const Xbyak::Xmm& xmm : to_daz) {
|
for (const Xbyak::Xmm& xmm : to_daz) {
|
||||||
FCODE(vfixupimms)(xmm, xmm, tmp, u8(0));
|
FCODE(vfixupimms)(xmm, xmm, tmp, u8(0));
|
||||||
|
@ -102,17 +101,17 @@ void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const Xbyak::Xmm& xmm : to_daz) {
|
for (const Xbyak::Xmm& xmm : to_daz) {
|
||||||
code.movaps(xmm0, code.MConst(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask));
|
code.movaps(xmm0, code.XmmBConst<fsize>(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask));
|
||||||
code.andps(xmm0, xmm);
|
code.andps(xmm0, xmm);
|
||||||
if constexpr (fsize == 32) {
|
if constexpr (fsize == 32) {
|
||||||
code.pcmpgtd(xmm0, code.MConst(xword, f32_smallest_normal - 1));
|
code.pcmpgtd(xmm0, code.XmmBConst<32>(xword, f32_smallest_normal - 1));
|
||||||
} else if (code.HasHostFeature(HostFeature::SSE42)) {
|
} else if (code.HasHostFeature(HostFeature::SSE42)) {
|
||||||
code.pcmpgtq(xmm0, code.MConst(xword, f64_smallest_normal - 1));
|
code.pcmpgtq(xmm0, code.XmmBConst<64>(xword, f64_smallest_normal - 1));
|
||||||
} else {
|
} else {
|
||||||
code.pcmpgtd(xmm0, code.MConst(xword, f64_smallest_normal - 1));
|
code.pcmpgtd(xmm0, code.XmmBConst<64>(xword, f64_smallest_normal - 1));
|
||||||
code.pshufd(xmm0, xmm0, 0b11100101);
|
code.pshufd(xmm0, xmm0, 0b11100101);
|
||||||
}
|
}
|
||||||
code.orps(xmm0, code.MConst(xword, fsize == 32 ? f32_negative_zero : f64_negative_zero));
|
code.orps(xmm0, code.XmmBConst<fsize>(xword, fsize == 32 ? f32_negative_zero : f64_negative_zero));
|
||||||
code.andps(xmm, xmm0);
|
code.andps(xmm, xmm0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -123,7 +122,7 @@ void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch)
|
||||||
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
||||||
constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero,
|
constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero,
|
||||||
FpFixup::PosZero);
|
FpFixup::PosZero);
|
||||||
FCODE(vfixupimms)(xmm_value, xmm_value, code.MConst(ptr, u64(nan_to_zero)), u8(0));
|
FCODE(vfixupimms)(xmm_value, xmm_value, code.XmmBConst<32>(ptr, nan_to_zero), u8(0));
|
||||||
} else if (code.HasHostFeature(HostFeature::AVX)) {
|
} else if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
FCODE(vcmpords)(xmm_scratch, xmm_value, xmm_value);
|
FCODE(vcmpords)(xmm_scratch, xmm_value, xmm_value);
|
||||||
FCODE(vandp)(xmm_value, xmm_value, xmm_scratch);
|
FCODE(vandp)(xmm_value, xmm_value, xmm_scratch);
|
||||||
|
@ -139,15 +138,15 @@ void ForceToDefaultNaN(BlockOfCode& code, Xbyak::Xmm result) {
|
||||||
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
||||||
const Xbyak::Opmask nan_mask = k1;
|
const Xbyak::Opmask nan_mask = k1;
|
||||||
FCODE(vfpclasss)(nan_mask, result, u8(FpClass::QNaN | FpClass::SNaN));
|
FCODE(vfpclasss)(nan_mask, result, u8(FpClass::QNaN | FpClass::SNaN));
|
||||||
FCODE(vblendmp)(result | nan_mask, result, code.MConst(ptr_b, fsize == 32 ? f32_nan : f64_nan));
|
FCODE(vblendmp)(result | nan_mask, result, code.XmmBConst<fsize>(ptr_b, fsize == 32 ? f32_nan : f64_nan));
|
||||||
} else if (code.HasHostFeature(HostFeature::AVX)) {
|
} else if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
FCODE(vcmpunords)(xmm0, result, result);
|
FCODE(vcmpunords)(xmm0, result, result);
|
||||||
FCODE(blendvp)(result, code.MConst(xword, fsize == 32 ? f32_nan : f64_nan));
|
FCODE(blendvp)(result, code.XmmBConst<fsize>(xword, fsize == 32 ? f32_nan : f64_nan));
|
||||||
} else {
|
} else {
|
||||||
Xbyak::Label end;
|
Xbyak::Label end;
|
||||||
FCODE(ucomis)(result, result);
|
FCODE(ucomis)(result, result);
|
||||||
code.jnp(end);
|
code.jnp(end);
|
||||||
code.movaps(result, code.MConst(xword, fsize == 32 ? f32_nan : f64_nan));
|
code.movaps(result, code.XmmBConst<fsize>(xword, fsize == 32 ? f32_nan : f64_nan));
|
||||||
code.L(end);
|
code.L(end);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -161,7 +160,7 @@ Xbyak::Label ProcessNaN(BlockOfCode& code, Xbyak::Xmm a) {
|
||||||
code.SwitchToFarCode();
|
code.SwitchToFarCode();
|
||||||
code.L(nan);
|
code.L(nan);
|
||||||
|
|
||||||
code.orps(a, code.MConst(xword, fsize == 32 ? 0x00400000 : 0x0008'0000'0000'0000));
|
code.orps(a, code.XmmBConst<fsize>(xword, fsize == 32 ? 0x00400000 : 0x0008'0000'0000'0000));
|
||||||
|
|
||||||
code.jmp(end, code.T_NEAR);
|
code.jmp(end, code.T_NEAR);
|
||||||
code.SwitchToNearCode();
|
code.SwitchToNearCode();
|
||||||
|
@ -257,10 +256,10 @@ void EmitPostProcessNaNs(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm op1, X
|
||||||
|
|
||||||
// Silence the SNaN as required by spec.
|
// Silence the SNaN as required by spec.
|
||||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
code.vorps(result, op2, code.MConst(xword, mantissa_msb));
|
code.vorps(result, op2, code.XmmBConst<fsize>(xword, mantissa_msb));
|
||||||
} else {
|
} else {
|
||||||
code.movaps(result, op2);
|
code.movaps(result, op2);
|
||||||
code.orps(result, code.MConst(xword, mantissa_msb));
|
code.orps(result, code.XmmBConst<fsize>(xword, mantissa_msb));
|
||||||
}
|
}
|
||||||
code.jmp(end, code.T_NEAR);
|
code.jmp(end, code.T_NEAR);
|
||||||
}
|
}
|
||||||
|
@ -339,7 +338,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
|
||||||
FCODE(ucomis)(op1, op2);
|
FCODE(ucomis)(op1, op2);
|
||||||
code.jp(op_are_nans);
|
code.jp(op_are_nans);
|
||||||
// Here we must return a positive NaN, because the indefinite value on x86 is a negative NaN!
|
// Here we must return a positive NaN, because the indefinite value on x86 is a negative NaN!
|
||||||
code.movaps(result, code.MConst(xword, FP::FPInfo<FPT>::DefaultNaN()));
|
code.movaps(result, code.XmmBConst<fsize>(xword, FP::FPInfo<FPT>::DefaultNaN()));
|
||||||
code.jmp(end, code.T_NEAR);
|
code.jmp(end, code.T_NEAR);
|
||||||
code.L(op_are_nans);
|
code.L(op_are_nans);
|
||||||
EmitPostProcessNaNs<fsize>(code, result, op1, op2, tmp, end);
|
EmitPostProcessNaNs<fsize>(code, result, op1, op2, tmp, end);
|
||||||
|
@ -357,7 +356,7 @@ void FPAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Address mask = code.MConst(xword, non_sign_mask);
|
const Xbyak::Address mask = code.XmmBConst<fsize>(xword, non_sign_mask);
|
||||||
|
|
||||||
code.andps(result, mask);
|
code.andps(result, mask);
|
||||||
|
|
||||||
|
@ -383,7 +382,7 @@ void FPNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Address mask = code.MConst(xword, u64(sign_mask));
|
const Xbyak::Address mask = code.XmmBConst<fsize>(xword, u64(sign_mask));
|
||||||
|
|
||||||
code.xorps(result, mask);
|
code.xorps(result, mask);
|
||||||
|
|
||||||
|
@ -453,7 +452,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
code.L(nan);
|
code.L(nan);
|
||||||
if (ctx.FPCR().DN()) {
|
if (ctx.FPCR().DN()) {
|
||||||
code.movaps(result, code.MConst(xword, fsize == 32 ? f32_nan : f64_nan));
|
code.movaps(result, code.XmmBConst<fsize>(xword, fsize == 32 ? f32_nan : f64_nan));
|
||||||
code.jmp(end);
|
code.jmp(end);
|
||||||
} else {
|
} else {
|
||||||
code.movaps(tmp, result);
|
code.movaps(tmp, result);
|
||||||
|
@ -487,7 +486,7 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||||
|
|
||||||
if (ctx.FPCR().DN()) {
|
if (ctx.FPCR().DN()) {
|
||||||
FCODE(vcmps)(k1, op2, op2, Cmp::Unordered_Q);
|
FCODE(vcmps)(k1, op2, op2, Cmp::Unordered_Q);
|
||||||
FCODE(vmovs)(op2 | k1, code.MConst(xword, default_nan));
|
FCODE(vmovs)(op2 | k1, code.XmmBConst<fsize>(xword, default_nan));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Xbyak::Reg tmp = ctx.reg_alloc.ScratchGpr();
|
Xbyak::Reg tmp = ctx.reg_alloc.ScratchGpr();
|
||||||
|
@ -542,12 +541,12 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||||
code.jc(maybe_both_nan);
|
code.jc(maybe_both_nan);
|
||||||
if (ctx.FPCR().DN()) {
|
if (ctx.FPCR().DN()) {
|
||||||
code.L(snan);
|
code.L(snan);
|
||||||
code.movaps(op2, code.MConst(xword, default_nan));
|
code.movaps(op2, code.XmmBConst<fsize>(xword, default_nan));
|
||||||
code.jmp(end);
|
code.jmp(end);
|
||||||
} else {
|
} else {
|
||||||
code.movaps(op2, op1);
|
code.movaps(op2, op1);
|
||||||
code.L(snan);
|
code.L(snan);
|
||||||
code.orps(op2, code.MConst(xword, FP::FPInfo<FPT>::mantissa_msb));
|
code.orps(op2, code.XmmBConst<fsize>(xword, FP::FPInfo<FPT>::mantissa_msb));
|
||||||
code.jmp(end);
|
code.jmp(end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -648,9 +647,9 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.movaps(result, operand1);
|
code.movaps(result, operand1);
|
||||||
FCODE(vfmadd231s)(result, operand2, operand3);
|
FCODE(vfmadd231s)(result, operand2, operand3);
|
||||||
|
|
||||||
code.movaps(tmp, code.MConst(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask));
|
code.movaps(tmp, code.XmmBConst<fsize>(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask));
|
||||||
code.andps(tmp, result);
|
code.andps(tmp, result);
|
||||||
FCODE(ucomis)(tmp, code.MConst(xword, fsize == 32 ? f32_smallest_normal : f64_smallest_normal));
|
FCODE(ucomis)(tmp, code.XmmBConst<fsize>(xword, fsize == 32 ? f32_smallest_normal : f64_smallest_normal));
|
||||||
code.jz(fallback, code.T_NEAR);
|
code.jz(fallback, code.T_NEAR);
|
||||||
code.L(end);
|
code.L(end);
|
||||||
|
|
||||||
|
@ -758,12 +757,12 @@ static void EmitFPMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.movaps(result, op1);
|
code.movaps(result, op1);
|
||||||
code.xorps(result, op2);
|
code.xorps(result, op2);
|
||||||
}
|
}
|
||||||
code.andps(result, code.MConst(xword, FP::FPInfo<FPT>::sign_mask));
|
code.andps(result, code.XmmBConst<fsize>(xword, FP::FPInfo<FPT>::sign_mask));
|
||||||
code.orps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 2>()));
|
code.orps(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 2>()));
|
||||||
code.jmp(end, code.T_NEAR);
|
code.jmp(end, code.T_NEAR);
|
||||||
code.L(op_are_nans);
|
code.L(op_are_nans);
|
||||||
if (do_default_nan) {
|
if (do_default_nan) {
|
||||||
code.movaps(result, code.MConst(xword, FP::FPInfo<FPT>::DefaultNaN()));
|
code.movaps(result, code.XmmBConst<fsize>(xword, FP::FPInfo<FPT>::DefaultNaN()));
|
||||||
code.jmp(end, code.T_NEAR);
|
code.jmp(end, code.T_NEAR);
|
||||||
} else {
|
} else {
|
||||||
EmitPostProcessNaNs<fsize>(code, result, op1, op2, tmp, end);
|
EmitPostProcessNaNs<fsize>(code, result, op1, op2, tmp, end);
|
||||||
|
@ -864,7 +863,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.movaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 2>()));
|
code.movaps(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 2>()));
|
||||||
FCODE(vfnmadd231s)(result, operand1, operand2);
|
FCODE(vfnmadd231s)(result, operand1, operand2);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
@ -878,7 +877,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.movaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 2>()));
|
code.movaps(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 2>()));
|
||||||
FCODE(vfnmadd231s)(result, operand1, operand2);
|
FCODE(vfnmadd231s)(result, operand1, operand2);
|
||||||
FCODE(ucomis)(result, result);
|
FCODE(ucomis)(result, result);
|
||||||
code.jp(fallback, code.T_NEAR);
|
code.jp(fallback, code.T_NEAR);
|
||||||
|
@ -910,7 +909,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.movaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 2>()));
|
code.movaps(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 2>()));
|
||||||
FCODE(muls)(operand1, operand2);
|
FCODE(muls)(operand1, operand2);
|
||||||
FCODE(subs)(result, operand1);
|
FCODE(subs)(result, operand1);
|
||||||
|
|
||||||
|
@ -1040,19 +1039,19 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||||
|
|
||||||
code.movaps(value, operand);
|
code.movaps(value, operand);
|
||||||
|
|
||||||
code.movaps(xmm0, code.MConst(xword, fsize == 32 ? 0xFFFF8000 : 0xFFFF'F000'0000'0000));
|
code.movaps(xmm0, code.XmmBConst<fsize>(xword, fsize == 32 ? 0xFFFF8000 : 0xFFFF'F000'0000'0000));
|
||||||
code.pand(value, xmm0);
|
code.pand(value, xmm0);
|
||||||
code.por(value, code.MConst(xword, fsize == 32 ? 0x00008000 : 0x0000'1000'0000'0000));
|
code.por(value, code.XmmBConst<fsize>(xword, fsize == 32 ? 0x00008000 : 0x0000'1000'0000'0000));
|
||||||
|
|
||||||
// Detect NaNs, negatives, zeros, denormals and infinities
|
// Detect NaNs, negatives, zeros, denormals and infinities
|
||||||
FCODE(ucomis)(value, code.MConst(xword, FPT(1) << FP::FPInfo<FPT>::explicit_mantissa_width));
|
FCODE(ucomis)(value, code.XmmBConst<fsize>(xword, FPT(1) << FP::FPInfo<FPT>::explicit_mantissa_width));
|
||||||
code.jna(bad_values, code.T_NEAR);
|
code.jna(bad_values, code.T_NEAR);
|
||||||
|
|
||||||
FCODE(sqrts)(value, value);
|
FCODE(sqrts)(value, value);
|
||||||
ICODE(mov)(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 1>()));
|
ICODE(mov)(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 1>()));
|
||||||
FCODE(divs)(result, value);
|
FCODE(divs)(result, value);
|
||||||
|
|
||||||
ICODE(padd)(result, code.MConst(xword, fsize == 32 ? 0x00004000 : 0x0000'0800'0000'0000));
|
ICODE(padd)(result, code.XmmBConst<fsize>(xword, fsize == 32 ? 0x00004000 : 0x0000'0800'0000'0000));
|
||||||
code.pand(result, xmm0);
|
code.pand(result, xmm0);
|
||||||
|
|
||||||
code.L(end);
|
code.L(end);
|
||||||
|
@ -1091,7 +1090,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||||
}
|
}
|
||||||
|
|
||||||
code.L(default_nan);
|
code.L(default_nan);
|
||||||
code.movd(result, code.MConst(xword, 0x7FC00000));
|
code.movd(result, code.XmmBConst<32>(xword, 0x7FC00000));
|
||||||
code.jmp(end, code.T_NEAR);
|
code.jmp(end, code.T_NEAR);
|
||||||
} else {
|
} else {
|
||||||
Xbyak::Label nan, zero;
|
Xbyak::Label nan, zero;
|
||||||
|
@ -1120,26 +1119,26 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||||
|
|
||||||
code.L(zero);
|
code.L(zero);
|
||||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
code.vpor(result, value, code.MConst(xword, 0x7FF0'0000'0000'0000));
|
code.vpor(result, value, code.XmmBConst<64>(xword, 0x7FF0'0000'0000'0000));
|
||||||
} else {
|
} else {
|
||||||
code.movaps(result, value);
|
code.movaps(result, value);
|
||||||
code.por(result, code.MConst(xword, 0x7FF0'0000'0000'0000));
|
code.por(result, code.XmmBConst<64>(xword, 0x7FF0'0000'0000'0000));
|
||||||
}
|
}
|
||||||
code.jmp(end, code.T_NEAR);
|
code.jmp(end, code.T_NEAR);
|
||||||
|
|
||||||
code.L(nan);
|
code.L(nan);
|
||||||
if (!ctx.FPCR().DN()) {
|
if (!ctx.FPCR().DN()) {
|
||||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
code.vpor(result, operand, code.MConst(xword, 0x0008'0000'0000'0000));
|
code.vpor(result, operand, code.XmmBConst<64>(xword, 0x0008'0000'0000'0000));
|
||||||
} else {
|
} else {
|
||||||
code.movaps(result, operand);
|
code.movaps(result, operand);
|
||||||
code.por(result, code.MConst(xword, 0x0008'0000'0000'0000));
|
code.por(result, code.XmmBConst<64>(xword, 0x0008'0000'0000'0000));
|
||||||
}
|
}
|
||||||
code.jmp(end, code.T_NEAR);
|
code.jmp(end, code.T_NEAR);
|
||||||
}
|
}
|
||||||
|
|
||||||
code.L(default_nan);
|
code.L(default_nan);
|
||||||
code.movq(result, code.MConst(xword, 0x7FF8'0000'0000'0000));
|
code.movq(result, code.XmmBConst<64>(xword, 0x7FF8'0000'0000'0000));
|
||||||
code.jmp(end, code.T_NEAR);
|
code.jmp(end, code.T_NEAR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1193,9 +1192,9 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.vmovaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 3>()));
|
code.vmovaps(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 3>()));
|
||||||
FCODE(vfnmadd231s)(result, operand1, operand2);
|
FCODE(vfnmadd231s)(result, operand1, operand2);
|
||||||
FCODE(vmuls)(result, result, code.MConst(xword, FP::FPValue<FPT, false, -1, 1>()));
|
FCODE(vmuls)(result, result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, -1, 1>()));
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
return;
|
return;
|
||||||
|
@ -1208,7 +1207,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.vmovaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 3>()));
|
code.vmovaps(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 3>()));
|
||||||
FCODE(vfnmadd231s)(result, operand1, operand2);
|
FCODE(vfnmadd231s)(result, operand1, operand2);
|
||||||
|
|
||||||
// Detect if the intermediate result is infinity or NaN or nearly an infinity.
|
// Detect if the intermediate result is infinity or NaN or nearly an infinity.
|
||||||
|
@ -1223,7 +1222,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
|
|
||||||
code.jae(fallback, code.T_NEAR);
|
code.jae(fallback, code.T_NEAR);
|
||||||
|
|
||||||
FCODE(vmuls)(result, result, code.MConst(xword, FP::FPValue<FPT, false, -1, 1>()));
|
FCODE(vmuls)(result, result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, -1, 1>()));
|
||||||
code.L(end);
|
code.L(end);
|
||||||
|
|
||||||
code.SwitchToFarCode();
|
code.SwitchToFarCode();
|
||||||
|
@ -1252,10 +1251,10 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.movaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 3>()));
|
code.movaps(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 3>()));
|
||||||
FCODE(muls)(operand1, operand2);
|
FCODE(muls)(operand1, operand2);
|
||||||
FCODE(subs)(result, operand1);
|
FCODE(subs)(result, operand1);
|
||||||
FCODE(muls)(result, code.MConst(xword, FP::FPValue<FPT, false, -1, 1>()));
|
FCODE(muls)(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, -1, 1>()));
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, operand1);
|
ctx.reg_alloc.DefineValue(inst, operand1);
|
||||||
return;
|
return;
|
||||||
|
@ -1507,7 +1506,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
if constexpr (fsize == 64) {
|
if constexpr (fsize == 64) {
|
||||||
if (fbits != 0) {
|
if (fbits != 0) {
|
||||||
const u64 scale_factor = static_cast<u64>((fbits + 1023) << 52);
|
const u64 scale_factor = static_cast<u64>((fbits + 1023) << 52);
|
||||||
code.mulsd(src, code.MConst(xword, scale_factor));
|
code.mulsd(src, code.XmmBConst<64>(xword, scale_factor));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!truncating) {
|
if (!truncating) {
|
||||||
|
@ -1516,7 +1515,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
} else {
|
} else {
|
||||||
if (fbits != 0) {
|
if (fbits != 0) {
|
||||||
const u32 scale_factor = static_cast<u32>((fbits + 127) << 23);
|
const u32 scale_factor = static_cast<u32>((fbits + 127) << 23);
|
||||||
code.mulss(src, code.MConst(xword, scale_factor));
|
code.mulss(src, code.XmmBConst<32>(xword, scale_factor));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!truncating) {
|
if (!truncating) {
|
||||||
|
@ -1534,7 +1533,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
if (!unsigned_) {
|
if (!unsigned_) {
|
||||||
ZeroIfNaN<64>(code, src, scratch);
|
ZeroIfNaN<64>(code, src, scratch);
|
||||||
|
|
||||||
code.movsd(scratch, code.MConst(xword, f64_max_s64_lim));
|
code.movsd(scratch, code.XmmBConst<64>(xword, f64_max_s64_lim));
|
||||||
code.comisd(scratch, src);
|
code.comisd(scratch, src);
|
||||||
code.jna(saturate_max, code.T_NEAR);
|
code.jna(saturate_max, code.T_NEAR);
|
||||||
code.cvttsd2si(result, src); // 64 bit gpr
|
code.cvttsd2si(result, src); // 64 bit gpr
|
||||||
|
@ -1553,7 +1552,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.pxor(xmm0, xmm0);
|
code.pxor(xmm0, xmm0);
|
||||||
|
|
||||||
code.movaps(scratch, src);
|
code.movaps(scratch, src);
|
||||||
code.subsd(scratch, code.MConst(xword, f64_max_s64_lim));
|
code.subsd(scratch, code.XmmBConst<64>(xword, f64_max_s64_lim));
|
||||||
|
|
||||||
// these both result in zero if src/scratch are NaN
|
// these both result in zero if src/scratch are NaN
|
||||||
code.maxsd(src, xmm0);
|
code.maxsd(src, xmm0);
|
||||||
|
@ -1575,21 +1574,21 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
ZeroIfNaN<64>(code, src, scratch);
|
ZeroIfNaN<64>(code, src, scratch);
|
||||||
code.minsd(src, code.MConst(xword, f64_max_s32));
|
code.minsd(src, code.XmmBConst<64>(xword, f64_max_s32));
|
||||||
// maxsd not required as cvttsd2si results in 0x8000'0000 when out of range
|
// maxsd not required as cvttsd2si results in 0x8000'0000 when out of range
|
||||||
code.cvttsd2si(result.cvt32(), src); // 32 bit gpr
|
code.cvttsd2si(result.cvt32(), src); // 32 bit gpr
|
||||||
} else {
|
} else {
|
||||||
code.pxor(xmm0, xmm0);
|
code.pxor(xmm0, xmm0);
|
||||||
code.maxsd(src, xmm0); // results in a zero if src is NaN
|
code.maxsd(src, xmm0); // results in a zero if src is NaN
|
||||||
code.minsd(src, code.MConst(xword, f64_max_u32));
|
code.minsd(src, code.XmmBConst<64>(xword, f64_max_u32));
|
||||||
code.cvttsd2si(result, src); // 64 bit gpr
|
code.cvttsd2si(result, src); // 64 bit gpr
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
ZeroIfNaN<64>(code, src, scratch);
|
ZeroIfNaN<64>(code, src, scratch);
|
||||||
code.maxsd(src, code.MConst(xword, unsigned_ ? f64_min_u16 : f64_min_s16));
|
code.maxsd(src, code.XmmBConst<64>(xword, unsigned_ ? f64_min_u16 : f64_min_s16));
|
||||||
code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u16 : f64_max_s16));
|
code.minsd(src, code.XmmBConst<64>(xword, unsigned_ ? f64_max_u16 : f64_max_s16));
|
||||||
code.cvttsd2si(result, src); // 64 bit gpr
|
code.cvttsd2si(result, src); // 64 bit gpr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1714,7 +1713,7 @@ void EmitX64::EmitFPFixedS16ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
if (fbits != 0) {
|
if (fbits != 0) {
|
||||||
const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
|
const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
|
||||||
code.mulss(result, code.MConst(xword, scale_factor));
|
code.mulss(result, code.XmmBConst<32>(xword, scale_factor));
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
@ -1734,7 +1733,7 @@ void EmitX64::EmitFPFixedU16ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
if (fbits != 0) {
|
if (fbits != 0) {
|
||||||
const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
|
const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
|
||||||
code.mulss(result, code.MConst(xword, scale_factor));
|
code.mulss(result, code.XmmBConst<32>(xword, scale_factor));
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
@ -1759,7 +1758,7 @@ void EmitX64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
if (fbits != 0) {
|
if (fbits != 0) {
|
||||||
const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
|
const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
|
||||||
code.mulss(result, code.MConst(xword, scale_factor));
|
code.mulss(result, code.XmmBConst<32>(xword, scale_factor));
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
@ -1795,7 +1794,7 @@ void EmitX64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
if (fbits != 0) {
|
if (fbits != 0) {
|
||||||
const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
|
const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
|
||||||
code.mulss(result, code.MConst(xword, scale_factor));
|
code.mulss(result, code.XmmBConst<32>(xword, scale_factor));
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
@ -1815,7 +1814,7 @@ void EmitX64::EmitFPFixedS16ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
if (fbits != 0) {
|
if (fbits != 0) {
|
||||||
const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
|
const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
|
||||||
code.mulsd(result, code.MConst(xword, scale_factor));
|
code.mulsd(result, code.XmmBConst<64>(xword, scale_factor));
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
@ -1835,7 +1834,7 @@ void EmitX64::EmitFPFixedU16ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
if (fbits != 0) {
|
if (fbits != 0) {
|
||||||
const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
|
const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
|
||||||
code.mulsd(result, code.MConst(xword, scale_factor));
|
code.mulsd(result, code.XmmBConst<64>(xword, scale_factor));
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
@ -1853,7 +1852,7 @@ void EmitX64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
if (fbits != 0) {
|
if (fbits != 0) {
|
||||||
const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
|
const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
|
||||||
code.mulsd(result, code.MConst(xword, scale_factor));
|
code.mulsd(result, code.XmmBConst<64>(xword, scale_factor));
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
@ -1878,7 +1877,7 @@ void EmitX64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
if (fbits != 0) {
|
if (fbits != 0) {
|
||||||
const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
|
const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
|
||||||
code.mulsd(to, code.MConst(xword, scale_factor));
|
code.mulsd(to, code.XmmBConst<64>(xword, scale_factor));
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, to);
|
ctx.reg_alloc.DefineValue(inst, to);
|
||||||
|
@ -1897,7 +1896,7 @@ void EmitX64::EmitFPFixedS64ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
if (fbits != 0) {
|
if (fbits != 0) {
|
||||||
const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
|
const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
|
||||||
code.mulsd(result, code.MConst(xword, scale_factor));
|
code.mulsd(result, code.XmmBConst<64>(xword, scale_factor));
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
@ -1916,7 +1915,7 @@ void EmitX64::EmitFPFixedS64ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
if (fbits != 0) {
|
if (fbits != 0) {
|
||||||
const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
|
const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
|
||||||
code.mulss(result, code.MConst(xword, scale_factor));
|
code.mulss(result, code.XmmBConst<32>(xword, scale_factor));
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
@ -1937,18 +1936,18 @@ void EmitX64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.movq(tmp, from);
|
code.movq(tmp, from);
|
||||||
code.punpckldq(tmp, code.MConst(xword, 0x4530000043300000, 0));
|
code.punpckldq(tmp, code.XmmConst(xword, 0x4530000043300000, 0));
|
||||||
code.subpd(tmp, code.MConst(xword, 0x4330000000000000, 0x4530000000000000));
|
code.subpd(tmp, code.XmmConst(xword, 0x4330000000000000, 0x4530000000000000));
|
||||||
code.pshufd(result, tmp, 0b01001110);
|
code.pshufd(result, tmp, 0b01001110);
|
||||||
code.addpd(result, tmp);
|
code.addpd(result, tmp);
|
||||||
if (ctx.FPCR().RMode() == FP::RoundingMode::TowardsMinusInfinity) {
|
if (ctx.FPCR().RMode() == FP::RoundingMode::TowardsMinusInfinity) {
|
||||||
code.pand(result, code.MConst(xword, f64_non_sign_mask));
|
code.pand(result, code.XmmBConst<64>(xword, f64_non_sign_mask));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fbits != 0) {
|
if (fbits != 0) {
|
||||||
const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
|
const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
|
||||||
code.mulsd(result, code.MConst(xword, scale_factor));
|
code.mulsd(result, code.XmmBConst<64>(xword, scale_factor));
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
@ -1992,7 +1991,7 @@ void EmitX64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
if (fbits != 0) {
|
if (fbits != 0) {
|
||||||
const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
|
const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
|
||||||
code.mulss(result, code.MConst(xword, scale_factor));
|
code.mulss(result, code.XmmBConst<32>(xword, scale_factor));
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
|
|
@ -94,8 +94,8 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
// !(b <= a+b) == b > a+b
|
// !(b <= a+b) == b > a+b
|
||||||
code.movdqa(tmp_a, xmm_a);
|
code.movdqa(tmp_a, xmm_a);
|
||||||
code.movdqa(tmp_b, xmm_b);
|
code.movdqa(tmp_b, xmm_b);
|
||||||
code.paddw(tmp_a, code.MConst(xword, 0x80008000));
|
code.paddw(tmp_a, code.XmmBConst<16>(xword, 0x8000));
|
||||||
code.paddw(tmp_b, code.MConst(xword, 0x80008000));
|
code.paddw(tmp_b, code.XmmBConst<16>(xword, 0x8000));
|
||||||
code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
|
code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(ge_inst, tmp_b);
|
ctx.reg_alloc.DefineValue(ge_inst, tmp_b);
|
||||||
|
@ -217,8 +217,8 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
// (a >= b) == !(b > a)
|
// (a >= b) == !(b > a)
|
||||||
code.pcmpeqb(ones, ones);
|
code.pcmpeqb(ones, ones);
|
||||||
code.paddw(xmm_a, code.MConst(xword, 0x80008000));
|
code.paddw(xmm_a, code.XmmBConst<16>(xword, 0x8000));
|
||||||
code.paddw(xmm_b, code.MConst(xword, 0x80008000));
|
code.paddw(xmm_b, code.XmmBConst<16>(xword, 0x8000));
|
||||||
code.movdqa(xmm_ge, xmm_b);
|
code.movdqa(xmm_ge, xmm_b);
|
||||||
code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
|
code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
|
||||||
code.pxor(xmm_ge, ones);
|
code.pxor(xmm_ge, ones);
|
||||||
|
@ -654,7 +654,7 @@ void EmitX64::EmitPackedAbsDiffSumS8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
// TODO: Optimize with zero-extension detection
|
// TODO: Optimize with zero-extension detection
|
||||||
code.movaps(tmp, code.MConst(xword, 0xFFFFFFFF));
|
code.movaps(tmp, code.XmmBConst<8>(xword, 0xFF));
|
||||||
code.pand(xmm_a, tmp);
|
code.pand(xmm_a, tmp);
|
||||||
code.pand(xmm_b, tmp);
|
code.pand(xmm_b, tmp);
|
||||||
code.psadbw(xmm_a, xmm_b);
|
code.psadbw(xmm_a, xmm_b);
|
||||||
|
|
|
@ -458,7 +458,7 @@ static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const
|
||||||
const u64 shift_matrix = shift_amount < 8
|
const u64 shift_matrix = shift_amount < 8
|
||||||
? (0x0102040810204080 << (shift_amount * 8)) | (0x8080808080808080 >> (64 - shift_amount * 8))
|
? (0x0102040810204080 << (shift_amount * 8)) | (0x8080808080808080 >> (64 - shift_amount * 8))
|
||||||
: 0x8080808080808080;
|
: 0x8080808080808080;
|
||||||
code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0);
|
code.gf2p8affineqb(result, code.XmmBConst<64>(xword, shift_matrix), 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -519,7 +519,7 @@ void EmitX64::EmitVectorArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst)
|
||||||
|
|
||||||
code.pxor(tmp2, tmp2);
|
code.pxor(tmp2, tmp2);
|
||||||
code.psrlq(result, shift_amount);
|
code.psrlq(result, shift_amount);
|
||||||
code.movdqa(tmp1, code.MConst(xword, sign_bit, sign_bit));
|
code.movdqa(tmp1, code.XmmBConst<64>(xword, sign_bit));
|
||||||
code.pand(tmp1, result);
|
code.pand(tmp1, result);
|
||||||
code.psubq(tmp2, tmp1);
|
code.psubq(tmp2, tmp1);
|
||||||
code.por(result, tmp2);
|
code.por(result, tmp2);
|
||||||
|
@ -571,7 +571,7 @@ void EmitX64::EmitVectorArithmeticVShift16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm right_shift = xmm16;
|
const Xbyak::Xmm right_shift = xmm16;
|
||||||
const Xbyak::Xmm tmp = xmm17;
|
const Xbyak::Xmm tmp = xmm17;
|
||||||
|
|
||||||
code.vmovdqa32(tmp, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
|
code.vmovdqa32(tmp, code.XmmBConst<16>(xword, 0x00FF));
|
||||||
code.vpxord(right_shift, right_shift, right_shift);
|
code.vpxord(right_shift, right_shift, right_shift);
|
||||||
code.vpsubw(right_shift, right_shift, left_shift);
|
code.vpsubw(right_shift, right_shift, left_shift);
|
||||||
|
|
||||||
|
@ -606,7 +606,7 @@ void EmitX64::EmitVectorArithmeticVShift32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.vmovdqa(tmp, code.MConst(xword, 0x000000FF000000FF, 0x000000FF000000FF));
|
code.vmovdqa(tmp, code.XmmBConst<32>(xword, 0x000000FF));
|
||||||
code.vpxor(right_shift, right_shift, right_shift);
|
code.vpxor(right_shift, right_shift, right_shift);
|
||||||
code.vpsubd(right_shift, right_shift, left_shift);
|
code.vpsubd(right_shift, right_shift, left_shift);
|
||||||
|
|
||||||
|
@ -637,7 +637,7 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm right_shift = xmm16;
|
const Xbyak::Xmm right_shift = xmm16;
|
||||||
const Xbyak::Xmm tmp = xmm17;
|
const Xbyak::Xmm tmp = xmm17;
|
||||||
|
|
||||||
code.vmovdqa32(tmp, code.MConst(xword, 0x00000000000000FF, 0x00000000000000FF));
|
code.vmovdqa32(tmp, code.XmmBConst<64>(xword, 0x00000000000000FF));
|
||||||
code.vpxorq(right_shift, right_shift, right_shift);
|
code.vpxorq(right_shift, right_shift, right_shift);
|
||||||
code.vpsubq(right_shift, right_shift, left_shift);
|
code.vpsubq(right_shift, right_shift, left_shift);
|
||||||
|
|
||||||
|
@ -925,15 +925,15 @@ void EmitX64::EmitVectorCountLeadingZeros8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.movdqa(tmp1, code.MConst(xword, 0x0101010102020304, 0x0000000000000000));
|
code.movdqa(tmp1, code.XmmConst(xword, 0x0101010102020304, 0x0000000000000000));
|
||||||
code.movdqa(tmp2, tmp1);
|
code.movdqa(tmp2, tmp1);
|
||||||
|
|
||||||
code.pshufb(tmp2, data);
|
code.pshufb(tmp2, data);
|
||||||
code.psrlw(data, 4);
|
code.psrlw(data, 4);
|
||||||
code.pand(data, code.MConst(xword, 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F));
|
code.pand(data, code.XmmBConst<8>(xword, 0x0F));
|
||||||
code.pshufb(tmp1, data);
|
code.pshufb(tmp1, data);
|
||||||
|
|
||||||
code.movdqa(data, code.MConst(xword, 0x0404040404040404, 0x0404040404040404));
|
code.movdqa(data, code.XmmBConst<8>(xword, 0x04));
|
||||||
|
|
||||||
code.pcmpeqb(data, tmp1);
|
code.pcmpeqb(data, tmp1);
|
||||||
code.pand(data, tmp2);
|
code.pand(data, tmp2);
|
||||||
|
@ -966,11 +966,11 @@ void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.vpcmpeqw(zeros, zeros, zeros);
|
code.vpcmpeqw(zeros, zeros, zeros);
|
||||||
code.vpcmpeqw(tmp, tmp, tmp);
|
code.vpcmpeqw(tmp, tmp, tmp);
|
||||||
code.vpcmpeqw(zeros, zeros, data);
|
code.vpcmpeqw(zeros, zeros, data);
|
||||||
code.vpmullw(data, data, code.MConst(xword, 0xf0d3f0d3f0d3f0d3, 0xf0d3f0d3f0d3f0d3));
|
code.vpmullw(data, data, code.XmmBConst<16>(xword, 0xf0d3));
|
||||||
code.vpsllw(tmp, tmp, 15);
|
code.vpsllw(tmp, tmp, 15);
|
||||||
code.vpsllw(zeros, zeros, 7);
|
code.vpsllw(zeros, zeros, 7);
|
||||||
code.vpsrlw(data, data, 12);
|
code.vpsrlw(data, data, 12);
|
||||||
code.vmovdqa(result, code.MConst(xword, 0x0903060a040b0c10, 0x0f080e0207050d01));
|
code.vmovdqa(result, code.XmmConst(xword, 0x0903060a040b0c10, 0x0f080e0207050d01));
|
||||||
code.vpor(tmp, tmp, zeros);
|
code.vpor(tmp, tmp, zeros);
|
||||||
code.vpor(data, data, tmp);
|
code.vpor(data, data, tmp);
|
||||||
code.vpshufb(result, result, data);
|
code.vpshufb(result, result, data);
|
||||||
|
@ -1002,11 +1002,11 @@ void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.pcmpeqw(zeros, zeros);
|
code.pcmpeqw(zeros, zeros);
|
||||||
code.pcmpeqw(tmp, tmp);
|
code.pcmpeqw(tmp, tmp);
|
||||||
code.pcmpeqw(zeros, data);
|
code.pcmpeqw(zeros, data);
|
||||||
code.pmullw(data, code.MConst(xword, 0xf0d3f0d3f0d3f0d3, 0xf0d3f0d3f0d3f0d3));
|
code.pmullw(data, code.XmmBConst<16>(xword, 0xf0d3));
|
||||||
code.psllw(tmp, 15);
|
code.psllw(tmp, 15);
|
||||||
code.psllw(zeros, 7);
|
code.psllw(zeros, 7);
|
||||||
code.psrlw(data, 12);
|
code.psrlw(data, 12);
|
||||||
code.movdqa(result, code.MConst(xword, 0x0903060a040b0c10, 0x0f080e0207050d01));
|
code.movdqa(result, code.XmmConst(xword, 0x0903060a040b0c10, 0x0f080e0207050d01));
|
||||||
code.por(tmp, zeros);
|
code.por(tmp, zeros);
|
||||||
code.por(data, tmp);
|
code.por(data, tmp);
|
||||||
code.pshufb(result, data);
|
code.pshufb(result, data);
|
||||||
|
@ -1038,7 +1038,7 @@ void EmitX64::EmitVectorDeinterleaveEven8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
|
const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.movdqa(tmp, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
|
code.movdqa(tmp, code.XmmBConst<16>(xword, 0x00FF));
|
||||||
code.pand(lhs, tmp);
|
code.pand(lhs, tmp);
|
||||||
code.pand(rhs, tmp);
|
code.pand(rhs, tmp);
|
||||||
code.packuswb(lhs, rhs);
|
code.packuswb(lhs, rhs);
|
||||||
|
@ -1088,7 +1088,7 @@ void EmitX64::EmitVectorDeinterleaveEvenLower8(EmitContext& ctx, IR::Inst* inst)
|
||||||
const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
|
const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.movdqa(tmp, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
|
code.movdqa(tmp, code.XmmBConst<16>(xword, 0x00FF));
|
||||||
code.pand(lhs, tmp);
|
code.pand(lhs, tmp);
|
||||||
code.pand(rhs, tmp);
|
code.pand(rhs, tmp);
|
||||||
code.packuswb(lhs, rhs);
|
code.packuswb(lhs, rhs);
|
||||||
|
@ -1423,13 +1423,13 @@ static void EmitVectorHalvingAddUnsigned(size_t esize, EmitContext& ctx, IR::Ins
|
||||||
case 8:
|
case 8:
|
||||||
code.pavgb(tmp, a);
|
code.pavgb(tmp, a);
|
||||||
code.pxor(a, b);
|
code.pxor(a, b);
|
||||||
code.pand(a, code.MConst(xword, 0x0101010101010101, 0x0101010101010101));
|
code.pand(a, code.XmmBConst<8>(xword, 0x01));
|
||||||
code.psubb(tmp, a);
|
code.psubb(tmp, a);
|
||||||
break;
|
break;
|
||||||
case 16:
|
case 16:
|
||||||
code.pavgw(tmp, a);
|
code.pavgw(tmp, a);
|
||||||
code.pxor(a, b);
|
code.pxor(a, b);
|
||||||
code.pand(a, code.MConst(xword, 0x0001000100010001, 0x0001000100010001));
|
code.pand(a, code.XmmBConst<16>(xword, 0x0001));
|
||||||
code.psubw(tmp, a);
|
code.psubw(tmp, a);
|
||||||
break;
|
break;
|
||||||
case 32:
|
case 32:
|
||||||
|
@ -1464,7 +1464,7 @@ static void EmitVectorHalvingSubSigned(size_t esize, EmitContext& ctx, IR::Inst*
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 8: {
|
case 8: {
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
code.movdqa(tmp, code.MConst(xword, 0x8080808080808080, 0x8080808080808080));
|
code.movdqa(tmp, code.XmmBConst<8>(xword, 0x80));
|
||||||
code.pxor(a, tmp);
|
code.pxor(a, tmp);
|
||||||
code.pxor(b, tmp);
|
code.pxor(b, tmp);
|
||||||
code.pavgb(b, a);
|
code.pavgb(b, a);
|
||||||
|
@ -1473,7 +1473,7 @@ static void EmitVectorHalvingSubSigned(size_t esize, EmitContext& ctx, IR::Inst*
|
||||||
}
|
}
|
||||||
case 16: {
|
case 16: {
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
code.movdqa(tmp, code.MConst(xword, 0x8000800080008000, 0x8000800080008000));
|
code.movdqa(tmp, code.XmmBConst<16>(xword, 0x8000));
|
||||||
code.pxor(a, tmp);
|
code.pxor(a, tmp);
|
||||||
code.pxor(b, tmp);
|
code.pxor(b, tmp);
|
||||||
code.pavgw(b, a);
|
code.pavgw(b, a);
|
||||||
|
@ -1635,13 +1635,13 @@ void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.paddb(result, result);
|
code.paddb(result, result);
|
||||||
} else if (code.HasHostFeature(HostFeature::GFNI)) {
|
} else if (code.HasHostFeature(HostFeature::GFNI)) {
|
||||||
const u64 shift_matrix = 0x0102040810204080 >> (shift_amount * 8);
|
const u64 shift_matrix = 0x0102040810204080 >> (shift_amount * 8);
|
||||||
code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0);
|
code.gf2p8affineqb(result, code.XmmBConst<64>(xword, shift_matrix), 0);
|
||||||
} else {
|
} else {
|
||||||
const u64 replicand = (0xFFULL << shift_amount) & 0xFF;
|
const u64 replicand = (0xFFULL << shift_amount) & 0xFF;
|
||||||
const u64 mask = mcl::bit::replicate_element<u8, u64>(replicand);
|
const u64 mask = mcl::bit::replicate_element<u8, u64>(replicand);
|
||||||
|
|
||||||
code.psllw(result, shift_amount);
|
code.psllw(result, shift_amount);
|
||||||
code.pand(result, code.MConst(xword, mask, mask));
|
code.pand(result, code.XmmBConst<64>(xword, mask));
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
@ -1692,13 +1692,13 @@ void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.pxor(result, result);
|
code.pxor(result, result);
|
||||||
} else if (code.HasHostFeature(HostFeature::GFNI)) {
|
} else if (code.HasHostFeature(HostFeature::GFNI)) {
|
||||||
const u64 shift_matrix = 0x0102040810204080 << (shift_amount * 8);
|
const u64 shift_matrix = 0x0102040810204080 << (shift_amount * 8);
|
||||||
code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0);
|
code.gf2p8affineqb(result, code.XmmBConst<64>(xword, shift_matrix), 0);
|
||||||
} else {
|
} else {
|
||||||
const u64 replicand = 0xFEULL >> shift_amount;
|
const u64 replicand = 0xFEULL >> shift_amount;
|
||||||
const u64 mask = mcl::bit::replicate_element<u8, u64>(replicand);
|
const u64 mask = mcl::bit::replicate_element<u8, u64>(replicand);
|
||||||
|
|
||||||
code.psrlw(result, shift_amount);
|
code.psrlw(result, shift_amount);
|
||||||
code.pand(result, code.MConst(xword, mask, mask));
|
code.pand(result, code.XmmConst(xword, mask, mask));
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
@ -1752,7 +1752,7 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm right_shift = xmm16;
|
const Xbyak::Xmm right_shift = xmm16;
|
||||||
const Xbyak::Xmm tmp = xmm17;
|
const Xbyak::Xmm tmp = xmm17;
|
||||||
|
|
||||||
code.vmovdqa32(tmp, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
|
code.vmovdqa32(tmp, code.XmmBConst<16>(xword, 0x00FF));
|
||||||
code.vpxord(right_shift, right_shift, right_shift);
|
code.vpxord(right_shift, right_shift, right_shift);
|
||||||
code.vpsubw(right_shift, right_shift, left_shift);
|
code.vpsubw(right_shift, right_shift, left_shift);
|
||||||
code.vpandd(left_shift, left_shift, tmp);
|
code.vpandd(left_shift, left_shift, tmp);
|
||||||
|
@ -1780,7 +1780,7 @@ void EmitX64::EmitVectorLogicalVShift32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.vmovdqa(tmp, code.MConst(xword, 0x000000FF000000FF, 0x000000FF000000FF));
|
code.vmovdqa(tmp, code.XmmBConst<32>(xword, 0x000000FF));
|
||||||
code.vpxor(right_shift, right_shift, right_shift);
|
code.vpxor(right_shift, right_shift, right_shift);
|
||||||
code.vpsubd(right_shift, right_shift, left_shift);
|
code.vpsubd(right_shift, right_shift, left_shift);
|
||||||
code.vpand(left_shift, left_shift, tmp);
|
code.vpand(left_shift, left_shift, tmp);
|
||||||
|
@ -1808,7 +1808,7 @@ void EmitX64::EmitVectorLogicalVShift64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.vmovdqa(tmp, code.MConst(xword, 0x00000000000000FF, 0x00000000000000FF));
|
code.vmovdqa(tmp, code.XmmBConst<64>(xword, 0x00000000000000FF));
|
||||||
code.vpxor(right_shift, right_shift, right_shift);
|
code.vpxor(right_shift, right_shift, right_shift);
|
||||||
code.vpsubq(right_shift, right_shift, left_shift);
|
code.vpsubq(right_shift, right_shift, left_shift);
|
||||||
code.vpand(left_shift, left_shift, tmp);
|
code.vpand(left_shift, left_shift, tmp);
|
||||||
|
@ -1928,7 +1928,7 @@ void EmitX64::EmitVectorMaxU32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
code.movdqa(tmp, code.MConst(xword, 0x8000000080000000, 0x8000000080000000));
|
code.movdqa(tmp, code.XmmBConst<32>(xword, 0x80000000));
|
||||||
|
|
||||||
const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm();
|
||||||
code.movdqa(tmp_b, b);
|
code.movdqa(tmp_b, b);
|
||||||
|
@ -1957,7 +1957,7 @@ void EmitX64::EmitVectorMaxU64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.vmovdqa(xmm0, code.MConst(xword, 0x8000000000000000, 0x8000000000000000));
|
code.vmovdqa(xmm0, code.XmmBConst<64>(xword, 0x8000000000000000));
|
||||||
code.vpsubq(tmp, y, xmm0);
|
code.vpsubq(tmp, y, xmm0);
|
||||||
code.vpsubq(xmm0, x, xmm0);
|
code.vpsubq(xmm0, x, xmm0);
|
||||||
code.vpcmpgtq(xmm0, tmp, xmm0);
|
code.vpcmpgtq(xmm0, tmp, xmm0);
|
||||||
|
@ -2076,7 +2076,7 @@ void EmitX64::EmitVectorMinU32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
const Xbyak::Xmm sint_max_plus_one = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm sint_max_plus_one = ctx.reg_alloc.ScratchXmm();
|
||||||
code.movdqa(sint_max_plus_one, code.MConst(xword, 0x8000000080000000, 0x8000000080000000));
|
code.movdqa(sint_max_plus_one, code.XmmBConst<32>(xword, 0x80000000));
|
||||||
|
|
||||||
const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm();
|
||||||
code.movdqa(tmp_a, a);
|
code.movdqa(tmp_a, a);
|
||||||
|
@ -2107,7 +2107,7 @@ void EmitX64::EmitVectorMinU64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
|
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.vmovdqa(xmm0, code.MConst(xword, 0x8000000000000000, 0x8000000000000000));
|
code.vmovdqa(xmm0, code.XmmBConst<64>(xword, 0x8000000000000000));
|
||||||
code.vpsubq(tmp, y, xmm0);
|
code.vpsubq(tmp, y, xmm0);
|
||||||
code.vpsubq(xmm0, x, xmm0);
|
code.vpsubq(xmm0, x, xmm0);
|
||||||
code.vpcmpgtq(xmm0, tmp, xmm0);
|
code.vpcmpgtq(xmm0, tmp, xmm0);
|
||||||
|
@ -2136,7 +2136,7 @@ void EmitX64::EmitVectorMultiply8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.psrlw(tmp_a, 8);
|
code.psrlw(tmp_a, 8);
|
||||||
code.psrlw(tmp_b, 8);
|
code.psrlw(tmp_b, 8);
|
||||||
code.pmullw(tmp_a, tmp_b);
|
code.pmullw(tmp_a, tmp_b);
|
||||||
code.pand(a, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
|
code.pand(a, code.XmmBConst<16>(xword, 0x00FF));
|
||||||
code.psllw(tmp_a, 8);
|
code.psllw(tmp_a, 8);
|
||||||
code.por(a, tmp_a);
|
code.por(a, tmp_a);
|
||||||
|
|
||||||
|
@ -2238,7 +2238,7 @@ void EmitX64::EmitVectorNarrow16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.pxor(zeros, zeros);
|
code.pxor(zeros, zeros);
|
||||||
code.pand(a, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
|
code.pand(a, code.XmmBConst<16>(xword, 0x00FF));
|
||||||
code.packuswb(a, zeros);
|
code.packuswb(a, zeros);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
@ -2522,7 +2522,7 @@ void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst)
|
||||||
|
|
||||||
code.movdqa(c, a);
|
code.movdqa(c, a);
|
||||||
code.psllq(a, 32);
|
code.psllq(a, 32);
|
||||||
code.movdqa(tmp1, code.MConst(xword, 0x80000000'00000000, 0x80000000'00000000));
|
code.movdqa(tmp1, code.XmmBConst<64>(xword, 0x80000000'00000000));
|
||||||
code.movdqa(tmp2, tmp1);
|
code.movdqa(tmp2, tmp1);
|
||||||
code.pand(tmp1, a);
|
code.pand(tmp1, a);
|
||||||
code.pand(tmp2, c);
|
code.pand(tmp2, c);
|
||||||
|
@ -2674,7 +2674,7 @@ void EmitX64::EmitVectorPairedMaxU32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.DefineValue(inst, x);
|
ctx.reg_alloc.DefineValue(inst, x);
|
||||||
} else {
|
} else {
|
||||||
const Xbyak::Xmm tmp3 = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp3 = ctx.reg_alloc.ScratchXmm();
|
||||||
code.movdqa(tmp3, code.MConst(xword, 0x8000000080000000, 0x8000000080000000));
|
code.movdqa(tmp3, code.XmmBConst<32>(xword, 0x80000000));
|
||||||
|
|
||||||
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
|
||||||
code.movdqa(tmp2, x);
|
code.movdqa(tmp2, x);
|
||||||
|
@ -2759,7 +2759,7 @@ void EmitX64::EmitVectorPairedMinU32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.DefineValue(inst, x);
|
ctx.reg_alloc.DefineValue(inst, x);
|
||||||
} else {
|
} else {
|
||||||
const Xbyak::Xmm tmp3 = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp3 = ctx.reg_alloc.ScratchXmm();
|
||||||
code.movdqa(tmp3, code.MConst(xword, 0x8000000080000000, 0x8000000080000000));
|
code.movdqa(tmp3, code.XmmBConst<32>(xword, 0x80000000));
|
||||||
|
|
||||||
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
|
||||||
code.movdqa(tmp2, tmp1);
|
code.movdqa(tmp2, tmp1);
|
||||||
|
@ -2803,7 +2803,7 @@ void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Label loop;
|
Xbyak::Label loop;
|
||||||
|
|
||||||
code.pxor(result, result);
|
code.pxor(result, result);
|
||||||
code.movdqa(mask, code.MConst(xword, 0x0101010101010101, 0x0101010101010101));
|
code.movdqa(mask, code.XmmBConst<8>(xword, 0x01));
|
||||||
code.mov(counter, 8);
|
code.mov(counter, 8);
|
||||||
|
|
||||||
code.L(loop);
|
code.L(loop);
|
||||||
|
@ -2847,7 +2847,7 @@ void EmitX64::EmitVectorPolynomialMultiplyLong8(EmitContext& ctx, IR::Inst* inst
|
||||||
code.pmovzxbw(xmm_a, xmm_a);
|
code.pmovzxbw(xmm_a, xmm_a);
|
||||||
code.pmovzxbw(xmm_b, xmm_b);
|
code.pmovzxbw(xmm_b, xmm_b);
|
||||||
code.pxor(result, result);
|
code.pxor(result, result);
|
||||||
code.movdqa(mask, code.MConst(xword, 0x0001000100010001, 0x0001000100010001));
|
code.movdqa(mask, code.XmmBConst<16>(xword, 0x0001));
|
||||||
code.mov(counter, 8);
|
code.mov(counter, 8);
|
||||||
|
|
||||||
code.L(loop);
|
code.L(loop);
|
||||||
|
@ -2930,11 +2930,11 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
code.movdqa(high_a, low_a);
|
code.movdqa(high_a, low_a);
|
||||||
code.psrlw(high_a, 4);
|
code.psrlw(high_a, 4);
|
||||||
code.movdqa(tmp1, code.MConst(xword, 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F));
|
code.movdqa(tmp1, code.XmmBConst<8>(xword, 0x0F));
|
||||||
code.pand(high_a, tmp1); // High nibbles
|
code.pand(high_a, tmp1); // High nibbles
|
||||||
code.pand(low_a, tmp1); // Low nibbles
|
code.pand(low_a, tmp1); // Low nibbles
|
||||||
|
|
||||||
code.movdqa(tmp1, code.MConst(xword, 0x0302020102010100, 0x0403030203020201));
|
code.movdqa(tmp1, code.XmmConst(xword, 0x0302020102010100, 0x0403030203020201));
|
||||||
code.movdqa(tmp2, tmp1);
|
code.movdqa(tmp2, tmp1);
|
||||||
code.pshufb(tmp1, low_a);
|
code.pshufb(tmp1, low_a);
|
||||||
code.pshufb(tmp2, high_a);
|
code.pshufb(tmp2, high_a);
|
||||||
|
@ -2958,10 +2958,10 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::GFNI)) {
|
if (code.HasHostFeature(HostFeature::GFNI)) {
|
||||||
code.gf2p8affineqb(data, code.MConst(xword, 0x8040201008040201, 0x8040201008040201), 0);
|
code.gf2p8affineqb(data, code.XmmBConst<64>(xword, 0x8040201008040201), 0);
|
||||||
} else {
|
} else {
|
||||||
const Xbyak::Xmm high_nibble_reg = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm high_nibble_reg = ctx.reg_alloc.ScratchXmm();
|
||||||
code.movdqa(high_nibble_reg, code.MConst(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0));
|
code.movdqa(high_nibble_reg, code.XmmBConst<8>(xword, 0xF0));
|
||||||
code.pand(high_nibble_reg, data);
|
code.pand(high_nibble_reg, data);
|
||||||
code.pxor(data, high_nibble_reg);
|
code.pxor(data, high_nibble_reg);
|
||||||
code.psrld(high_nibble_reg, 4);
|
code.psrld(high_nibble_reg, 4);
|
||||||
|
@ -2969,25 +2969,25 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) {
|
||||||
if (code.HasHostFeature(HostFeature::SSSE3)) {
|
if (code.HasHostFeature(HostFeature::SSSE3)) {
|
||||||
// High lookup
|
// High lookup
|
||||||
const Xbyak::Xmm high_reversed_reg = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm high_reversed_reg = ctx.reg_alloc.ScratchXmm();
|
||||||
code.movdqa(high_reversed_reg, code.MConst(xword, 0xE060A020C0408000, 0xF070B030D0509010));
|
code.movdqa(high_reversed_reg, code.XmmConst(xword, 0xE060A020C0408000, 0xF070B030D0509010));
|
||||||
code.pshufb(high_reversed_reg, data);
|
code.pshufb(high_reversed_reg, data);
|
||||||
|
|
||||||
// Low lookup (low nibble equivalent of the above)
|
// Low lookup (low nibble equivalent of the above)
|
||||||
code.movdqa(data, code.MConst(xword, 0x0E060A020C040800, 0x0F070B030D050901));
|
code.movdqa(data, code.XmmConst(xword, 0x0E060A020C040800, 0x0F070B030D050901));
|
||||||
code.pshufb(data, high_nibble_reg);
|
code.pshufb(data, high_nibble_reg);
|
||||||
code.por(data, high_reversed_reg);
|
code.por(data, high_reversed_reg);
|
||||||
} else {
|
} else {
|
||||||
code.pslld(data, 4);
|
code.pslld(data, 4);
|
||||||
code.por(data, high_nibble_reg);
|
code.por(data, high_nibble_reg);
|
||||||
|
|
||||||
code.movdqa(high_nibble_reg, code.MConst(xword, 0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC));
|
code.movdqa(high_nibble_reg, code.XmmBConst<8>(xword, 0xCC));
|
||||||
code.pand(high_nibble_reg, data);
|
code.pand(high_nibble_reg, data);
|
||||||
code.pxor(data, high_nibble_reg);
|
code.pxor(data, high_nibble_reg);
|
||||||
code.psrld(high_nibble_reg, 2);
|
code.psrld(high_nibble_reg, 2);
|
||||||
code.pslld(data, 2);
|
code.pslld(data, 2);
|
||||||
code.por(data, high_nibble_reg);
|
code.por(data, high_nibble_reg);
|
||||||
|
|
||||||
code.movdqa(high_nibble_reg, code.MConst(xword, 0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA));
|
code.movdqa(high_nibble_reg, code.XmmBConst<8>(xword, 0xAA));
|
||||||
code.pand(high_nibble_reg, data);
|
code.pand(high_nibble_reg, data);
|
||||||
code.pxor(data, high_nibble_reg);
|
code.pxor(data, high_nibble_reg);
|
||||||
code.psrld(high_nibble_reg, 1);
|
code.psrld(high_nibble_reg, 1);
|
||||||
|
@ -3037,7 +3037,7 @@ void EmitX64::EmitVectorReduceAdd16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.paddw(data, temp);
|
code.paddw(data, temp);
|
||||||
|
|
||||||
// Add pairs of 16-bit values into 32-bit lanes
|
// Add pairs of 16-bit values into 32-bit lanes
|
||||||
code.movdqa(temp, code.MConst(xword, 0x0001000100010001, 0x0001000100010001));
|
code.movdqa(temp, code.XmmBConst<16>(xword, 0x0001));
|
||||||
code.pmaddwd(data, temp);
|
code.pmaddwd(data, temp);
|
||||||
|
|
||||||
// Sum adjacent 32-bit lanes
|
// Sum adjacent 32-bit lanes
|
||||||
|
@ -3100,7 +3100,7 @@ static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, I
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 8: {
|
case 8: {
|
||||||
const Xbyak::Xmm vec_128 = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm vec_128 = ctx.reg_alloc.ScratchXmm();
|
||||||
code.movdqa(vec_128, code.MConst(xword, 0x8080808080808080, 0x8080808080808080));
|
code.movdqa(vec_128, code.XmmBConst<8>(xword, 0x80));
|
||||||
|
|
||||||
code.paddb(a, vec_128);
|
code.paddb(a, vec_128);
|
||||||
code.paddb(b, vec_128);
|
code.paddb(b, vec_128);
|
||||||
|
@ -3110,7 +3110,7 @@ static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, I
|
||||||
}
|
}
|
||||||
case 16: {
|
case 16: {
|
||||||
const Xbyak::Xmm vec_32768 = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm vec_32768 = ctx.reg_alloc.ScratchXmm();
|
||||||
code.movdqa(vec_32768, code.MConst(xword, 0x8000800080008000, 0x8000800080008000));
|
code.movdqa(vec_32768, code.XmmBConst<16>(xword, 0x8000));
|
||||||
|
|
||||||
code.paddw(a, vec_32768);
|
code.paddw(a, vec_32768);
|
||||||
code.paddw(b, vec_32768);
|
code.paddw(b, vec_32768);
|
||||||
|
@ -3506,7 +3506,7 @@ void EmitX64::EmitVectorSignedMultiply32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.pand(tmp, y);
|
code.pand(tmp, y);
|
||||||
code.pand(sign_correction, x);
|
code.pand(sign_correction, x);
|
||||||
code.paddd(sign_correction, tmp);
|
code.paddd(sign_correction, tmp);
|
||||||
code.pand(sign_correction, code.MConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF));
|
code.pand(sign_correction, code.XmmBConst<32>(xword, 0x7FFFFFFF));
|
||||||
|
|
||||||
// calculate unsigned multiply
|
// calculate unsigned multiply
|
||||||
code.movdqa(tmp, x);
|
code.movdqa(tmp, x);
|
||||||
|
@ -3547,13 +3547,13 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo
|
||||||
const Xbyak::Address mask = [esize, &code] {
|
const Xbyak::Address mask = [esize, &code] {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 8:
|
case 8:
|
||||||
return code.MConst(xword, 0x8080808080808080, 0x8080808080808080);
|
return code.XmmBConst<8>(xword, 0x80);
|
||||||
case 16:
|
case 16:
|
||||||
return code.MConst(xword, 0x8000800080008000, 0x8000800080008000);
|
return code.XmmBConst<16>(xword, 0x8000);
|
||||||
case 32:
|
case 32:
|
||||||
return code.MConst(xword, 0x8000000080000000, 0x8000000080000000);
|
return code.XmmBConst<32>(xword, 0x80000000);
|
||||||
case 64:
|
case 64:
|
||||||
return code.MConst(xword, 0x8000000000000000, 0x8000000000000000);
|
return code.XmmBConst<64>(xword, 0x8000000000000000);
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
@ -3717,7 +3717,7 @@ static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitC
|
||||||
code.vpblendvb(xmm0, tmp, tmp2, xmm0);
|
code.vpblendvb(xmm0, tmp, tmp2, xmm0);
|
||||||
ctx.reg_alloc.Release(tmp2);
|
ctx.reg_alloc.Release(tmp2);
|
||||||
} else {
|
} else {
|
||||||
code.pand(xmm0, code.MConst(xword, 0x8080808080808080, 0x8080808080808080));
|
code.pand(xmm0, code.XmmBConst<8>(xword, 0x80));
|
||||||
code.movdqa(tmp, xmm0);
|
code.movdqa(tmp, xmm0);
|
||||||
code.psrlw(tmp, 7);
|
code.psrlw(tmp, 7);
|
||||||
code.pxor(xmm0, xmm0);
|
code.pxor(xmm0, xmm0);
|
||||||
|
@ -3836,14 +3836,14 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply16(EmitContext& ctx, IR::
|
||||||
code.vpsrlw(lower_tmp, lower_tmp, 15);
|
code.vpsrlw(lower_tmp, lower_tmp, 15);
|
||||||
code.vpaddw(upper_tmp, upper_tmp, upper_tmp);
|
code.vpaddw(upper_tmp, upper_tmp, upper_tmp);
|
||||||
code.vpor(upper_result, upper_tmp, lower_tmp);
|
code.vpor(upper_result, upper_tmp, lower_tmp);
|
||||||
code.vpcmpeqw(upper_tmp, upper_result, code.MConst(xword, 0x8000800080008000, 0x8000800080008000));
|
code.vpcmpeqw(upper_tmp, upper_result, code.XmmBConst<16>(xword, 0x8000));
|
||||||
code.vpxor(upper_result, upper_result, upper_tmp);
|
code.vpxor(upper_result, upper_result, upper_tmp);
|
||||||
} else {
|
} else {
|
||||||
code.paddw(upper_tmp, upper_tmp);
|
code.paddw(upper_tmp, upper_tmp);
|
||||||
code.psrlw(lower_tmp, 15);
|
code.psrlw(lower_tmp, 15);
|
||||||
code.movdqa(upper_result, upper_tmp);
|
code.movdqa(upper_result, upper_tmp);
|
||||||
code.por(upper_result, lower_tmp);
|
code.por(upper_result, lower_tmp);
|
||||||
code.movdqa(upper_tmp, code.MConst(xword, 0x8000800080008000, 0x8000800080008000));
|
code.movdqa(upper_tmp, code.XmmBConst<16>(xword, 0x8000));
|
||||||
code.pcmpeqw(upper_tmp, upper_result);
|
code.pcmpeqw(upper_tmp, upper_result);
|
||||||
code.pxor(upper_result, upper_tmp);
|
code.pxor(upper_result, upper_tmp);
|
||||||
}
|
}
|
||||||
|
@ -3889,7 +3889,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply32(EmitContext& ctx, IR::
|
||||||
const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
code.vpcmpeqd(mask, upper_result, code.MConst(xword, 0x8000000080000000, 0x8000000080000000));
|
code.vpcmpeqd(mask, upper_result, code.XmmBConst<32>(xword, 0x80000000));
|
||||||
code.vpxor(upper_result, upper_result, mask);
|
code.vpxor(upper_result, upper_result, mask);
|
||||||
code.pmovmskb(bit, mask);
|
code.pmovmskb(bit, mask);
|
||||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||||
|
@ -3958,7 +3958,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply32(EmitContext& ctx, IR::
|
||||||
if (upper_inst) {
|
if (upper_inst) {
|
||||||
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
code.movdqa(tmp, code.MConst(xword, 0x8000000080000000, 0x8000000080000000));
|
code.movdqa(tmp, code.XmmBConst<32>(xword, 0x80000000));
|
||||||
code.pcmpeqd(tmp, upper_result);
|
code.pcmpeqd(tmp, upper_result);
|
||||||
code.pxor(upper_result, tmp);
|
code.pxor(upper_result, tmp);
|
||||||
code.pmovmskb(bit, tmp);
|
code.pmovmskb(bit, tmp);
|
||||||
|
@ -3984,10 +3984,10 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx,
|
||||||
code.pmaddwd(x, y);
|
code.pmaddwd(x, y);
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
code.vpcmpeqd(y, x, code.MConst(xword, 0x8000000080000000, 0x8000000080000000));
|
code.vpcmpeqd(y, x, code.XmmBConst<32>(xword, 0x80000000));
|
||||||
code.vpxor(x, x, y);
|
code.vpxor(x, x, y);
|
||||||
} else {
|
} else {
|
||||||
code.movdqa(y, code.MConst(xword, 0x8000000080000000, 0x8000000080000000));
|
code.movdqa(y, code.XmmBConst<32>(xword, 0x80000000));
|
||||||
code.pcmpeqd(y, x);
|
code.pcmpeqd(y, x);
|
||||||
code.pxor(x, y);
|
code.pxor(x, y);
|
||||||
}
|
}
|
||||||
|
@ -4037,11 +4037,11 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx,
|
||||||
|
|
||||||
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
code.vpcmpeqq(y, x, code.MConst(xword, 0x8000000000000000, 0x8000000000000000));
|
code.vpcmpeqq(y, x, code.XmmBConst<64>(xword, 0x8000000000000000));
|
||||||
code.vpxor(x, x, y);
|
code.vpxor(x, x, y);
|
||||||
code.vpmovmskb(bit, y);
|
code.vpmovmskb(bit, y);
|
||||||
} else {
|
} else {
|
||||||
code.movdqa(y, code.MConst(xword, 0x8000000000000000, 0x8000000000000000));
|
code.movdqa(y, code.XmmBConst<64>(xword, 0x8000000000000000));
|
||||||
code.pcmpeqd(y, x);
|
code.pcmpeqd(y, x);
|
||||||
code.shufps(y, y, 0b11110101);
|
code.shufps(y, y, 0b11110101);
|
||||||
code.pxor(x, y);
|
code.pxor(x, y);
|
||||||
|
@ -4187,13 +4187,13 @@ static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitCo
|
||||||
const Xbyak::Address mask = [esize, &code] {
|
const Xbyak::Address mask = [esize, &code] {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 8:
|
case 8:
|
||||||
return code.MConst(xword, 0x8080808080808080, 0x8080808080808080);
|
return code.XmmBConst<8>(xword, 0x80);
|
||||||
case 16:
|
case 16:
|
||||||
return code.MConst(xword, 0x8000800080008000, 0x8000800080008000);
|
return code.XmmBConst<16>(xword, 0x8000);
|
||||||
case 32:
|
case 32:
|
||||||
return code.MConst(xword, 0x8000000080000000, 0x8000000080000000);
|
return code.XmmBConst<32>(xword, 0x80000000);
|
||||||
case 64:
|
case 64:
|
||||||
return code.MConst(xword, 0x8000000000000000, 0x8000000000000000);
|
return code.XmmBConst<64>(xword, 0x8000000000000000);
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
@ -4448,7 +4448,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.Release(xmm_table0_upper);
|
ctx.reg_alloc.Release(xmm_table0_upper);
|
||||||
}
|
}
|
||||||
|
|
||||||
code.paddusb(indicies, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
|
code.paddusb(indicies, code.XmmConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
|
||||||
code.pshufb(xmm_table0, indicies);
|
code.pshufb(xmm_table0, indicies);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_table0);
|
ctx.reg_alloc.DefineValue(inst, xmm_table0);
|
||||||
|
@ -4467,10 +4467,10 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
code.vpaddusb(xmm0, indicies, code.MConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF));
|
code.vpaddusb(xmm0, indicies, code.XmmConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF));
|
||||||
} else {
|
} else {
|
||||||
code.movaps(xmm0, indicies);
|
code.movaps(xmm0, indicies);
|
||||||
code.paddusb(xmm0, code.MConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF));
|
code.paddusb(xmm0, code.XmmConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF));
|
||||||
}
|
}
|
||||||
code.pshufb(xmm_table0, indicies);
|
code.pshufb(xmm_table0, indicies);
|
||||||
code.pblendvb(xmm_table0, defaults);
|
code.pblendvb(xmm_table0, defaults);
|
||||||
|
@ -4496,12 +4496,12 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
code.vpaddusb(xmm0, indicies, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
|
code.vpaddusb(xmm0, indicies, code.XmmConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
|
||||||
} else {
|
} else {
|
||||||
code.movaps(xmm0, indicies);
|
code.movaps(xmm0, indicies);
|
||||||
code.paddusb(xmm0, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
|
code.paddusb(xmm0, code.XmmConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
|
||||||
}
|
}
|
||||||
code.paddusb(indicies, code.MConst(xword, 0x6060606060606060, 0xFFFFFFFFFFFFFFFF));
|
code.paddusb(indicies, code.XmmConst(xword, 0x6060606060606060, 0xFFFFFFFFFFFFFFFF));
|
||||||
code.pshufb(xmm_table0, xmm0);
|
code.pshufb(xmm_table0, xmm0);
|
||||||
code.pshufb(xmm_table1, indicies);
|
code.pshufb(xmm_table1, indicies);
|
||||||
code.pblendvb(xmm_table0, xmm_table1);
|
code.pblendvb(xmm_table0, xmm_table1);
|
||||||
|
@ -4528,19 +4528,19 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
code.vpaddusb(xmm0, indicies, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
|
code.vpaddusb(xmm0, indicies, code.XmmConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
|
||||||
} else {
|
} else {
|
||||||
code.movaps(xmm0, indicies);
|
code.movaps(xmm0, indicies);
|
||||||
code.paddusb(xmm0, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
|
code.paddusb(xmm0, code.XmmConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
|
||||||
}
|
}
|
||||||
code.pshufb(xmm_table0, indicies);
|
code.pshufb(xmm_table0, indicies);
|
||||||
code.pshufb(xmm_table1, indicies);
|
code.pshufb(xmm_table1, indicies);
|
||||||
code.pblendvb(xmm_table0, xmm_table1);
|
code.pblendvb(xmm_table0, xmm_table1);
|
||||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
code.vpaddusb(xmm0, indicies, code.MConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF));
|
code.vpaddusb(xmm0, indicies, code.XmmConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF));
|
||||||
} else {
|
} else {
|
||||||
code.movaps(xmm0, indicies);
|
code.movaps(xmm0, indicies);
|
||||||
code.paddusb(xmm0, code.MConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF));
|
code.paddusb(xmm0, code.XmmConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF));
|
||||||
}
|
}
|
||||||
code.pblendvb(xmm_table0, defaults);
|
code.pblendvb(xmm_table0, defaults);
|
||||||
|
|
||||||
|
@ -4605,7 +4605,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]);
|
const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]);
|
||||||
const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[1]);
|
const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[1]);
|
||||||
|
|
||||||
code.vptestnmb(write_mask, indicies, code.MConst(xword, 0xE0E0E0E0E0E0E0E0, 0xE0E0E0E0E0E0E0E0));
|
code.vptestnmb(write_mask, indicies, code.XmmBConst<8>(xword, 0xE0));
|
||||||
code.vpermi2b(indicies | write_mask, xmm_table0, xmm_table1);
|
code.vpermi2b(indicies | write_mask, xmm_table0, xmm_table1);
|
||||||
|
|
||||||
ctx.reg_alloc.Release(xmm_table0);
|
ctx.reg_alloc.Release(xmm_table0);
|
||||||
|
@ -4619,7 +4619,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
// Handle vector-table 2,3
|
// Handle vector-table 2,3
|
||||||
// vpcmpuble
|
// vpcmpuble
|
||||||
code.vpcmpub(upper_mask, indicies, code.MConst(xword, 0x3F3F3F3F3F3F3F3F, 0x3F3F3F3F3F3F3F3F), CmpInt::LessEqual);
|
code.vpcmpub(upper_mask, indicies, code.XmmBConst<8>(xword, 0x3F), CmpInt::LessEqual);
|
||||||
code.kandnw(write_mask, write_mask, upper_mask);
|
code.kandnw(write_mask, write_mask, upper_mask);
|
||||||
|
|
||||||
const Xbyak::Xmm xmm_table2 = ctx.reg_alloc.UseScratchXmm(table[2]);
|
const Xbyak::Xmm xmm_table2 = ctx.reg_alloc.UseScratchXmm(table[2]);
|
||||||
|
@ -4639,7 +4639,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[1]);
|
const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[1]);
|
||||||
const Xbyak::Opmask write_mask = k1;
|
const Xbyak::Opmask write_mask = k1;
|
||||||
|
|
||||||
code.vptestnmb(write_mask, indicies, code.MConst(xword, 0xE0E0E0E0E0E0E0E0, 0xE0E0E0E0E0E0E0E0));
|
code.vptestnmb(write_mask, indicies, code.XmmBConst<8>(xword, 0xE0));
|
||||||
code.vpermi2b(indicies, xmm_table0, xmm_table1);
|
code.vpermi2b(indicies, xmm_table0, xmm_table1);
|
||||||
|
|
||||||
if (is_defaults_zero) {
|
if (is_defaults_zero) {
|
||||||
|
@ -4656,7 +4656,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(args[2]);
|
const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(args[2]);
|
||||||
const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]);
|
const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]);
|
||||||
|
|
||||||
code.paddusb(indicies, code.MConst(xword, 0x7070707070707070, 0x7070707070707070));
|
code.paddusb(indicies, code.XmmBConst<8>(xword, 0x70));
|
||||||
code.pshufb(xmm_table0, indicies);
|
code.pshufb(xmm_table0, indicies);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_table0);
|
ctx.reg_alloc.DefineValue(inst, xmm_table0);
|
||||||
|
@ -4669,10 +4669,10 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]);
|
const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]);
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
code.vpaddusb(xmm0, indicies, code.MConst(xword, 0x7070707070707070, 0x7070707070707070));
|
code.vpaddusb(xmm0, indicies, code.XmmBConst<8>(xword, 0x70));
|
||||||
} else {
|
} else {
|
||||||
code.movaps(xmm0, indicies);
|
code.movaps(xmm0, indicies);
|
||||||
code.paddusb(xmm0, code.MConst(xword, 0x7070707070707070, 0x7070707070707070));
|
code.paddusb(xmm0, code.XmmBConst<8>(xword, 0x70));
|
||||||
}
|
}
|
||||||
code.pshufb(xmm_table0, indicies);
|
code.pshufb(xmm_table0, indicies);
|
||||||
code.pblendvb(xmm_table0, defaults);
|
code.pblendvb(xmm_table0, defaults);
|
||||||
|
@ -4687,12 +4687,12 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[1]);
|
const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[1]);
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
code.vpaddusb(xmm0, indicies, code.MConst(xword, 0x7070707070707070, 0x7070707070707070));
|
code.vpaddusb(xmm0, indicies, code.XmmBConst<8>(xword, 0x70));
|
||||||
} else {
|
} else {
|
||||||
code.movaps(xmm0, indicies);
|
code.movaps(xmm0, indicies);
|
||||||
code.paddusb(xmm0, code.MConst(xword, 0x7070707070707070, 0x7070707070707070));
|
code.paddusb(xmm0, code.XmmBConst<8>(xword, 0x70));
|
||||||
}
|
}
|
||||||
code.paddusb(indicies, code.MConst(xword, 0x6060606060606060, 0x6060606060606060));
|
code.paddusb(indicies, code.XmmBConst<8>(xword, 0x60));
|
||||||
code.pshufb(xmm_table0, xmm0);
|
code.pshufb(xmm_table0, xmm0);
|
||||||
code.pshufb(xmm_table1, indicies);
|
code.pshufb(xmm_table1, indicies);
|
||||||
code.pblendvb(xmm_table0, xmm_table1);
|
code.pblendvb(xmm_table0, xmm_table1);
|
||||||
|
@ -4706,14 +4706,14 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm masked = xmm16;
|
const Xbyak::Xmm masked = xmm16;
|
||||||
|
|
||||||
code.vpandd(masked, indicies, code.MConst(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0));
|
code.vpandd(masked, indicies, code.XmmBConst<8>(xword_b, 0xF0));
|
||||||
|
|
||||||
for (size_t i = 0; i < table_size; ++i) {
|
for (size_t i = 0; i < table_size; ++i) {
|
||||||
const Xbyak::Xmm xmm_table = ctx.reg_alloc.UseScratchXmm(table[i]);
|
const Xbyak::Xmm xmm_table = ctx.reg_alloc.UseScratchXmm(table[i]);
|
||||||
const Xbyak::Opmask table_mask = k1;
|
const Xbyak::Opmask table_mask = k1;
|
||||||
const u64 table_index = mcl::bit::replicate_element<u8, u64>(i * 16);
|
const u8 table_index = u8(i * 16);
|
||||||
|
|
||||||
code.vpcmpeqb(table_mask, masked, code.MConst(xword, table_index, table_index));
|
code.vpcmpeqb(table_mask, masked, code.XmmBConst<8>(xword, i * 16));
|
||||||
|
|
||||||
if (table_index == 0 && is_defaults_zero) {
|
if (table_index == 0 && is_defaults_zero) {
|
||||||
code.vpshufb(result | table_mask | T_z, xmm_table, indicies);
|
code.vpshufb(result | table_mask | T_z, xmm_table, indicies);
|
||||||
|
@ -4733,21 +4733,21 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code.movaps(masked, code.MConst(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0));
|
code.movaps(masked, code.XmmBConst<8>(xword, 0xF0));
|
||||||
code.pand(masked, indicies);
|
code.pand(masked, indicies);
|
||||||
|
|
||||||
for (size_t i = 0; i < table_size; ++i) {
|
for (size_t i = 0; i < table_size; ++i) {
|
||||||
const Xbyak::Xmm xmm_table = ctx.reg_alloc.UseScratchXmm(table[i]);
|
const Xbyak::Xmm xmm_table = ctx.reg_alloc.UseScratchXmm(table[i]);
|
||||||
|
|
||||||
const u64 table_index = mcl::bit::replicate_element<u8, u64>(i * 16);
|
const u8 table_index = u8(i * 16);
|
||||||
|
|
||||||
if (table_index == 0) {
|
if (table_index == 0) {
|
||||||
code.pxor(xmm0, xmm0);
|
code.pxor(xmm0, xmm0);
|
||||||
code.pcmpeqb(xmm0, masked);
|
code.pcmpeqb(xmm0, masked);
|
||||||
} else if (code.HasHostFeature(HostFeature::AVX)) {
|
} else if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
code.vpcmpeqb(xmm0, masked, code.MConst(xword, table_index, table_index));
|
code.vpcmpeqb(xmm0, masked, code.XmmBConst<8>(xword, table_index));
|
||||||
} else {
|
} else {
|
||||||
code.movaps(xmm0, code.MConst(xword, table_index, table_index));
|
code.movaps(xmm0, code.XmmBConst<8>(xword, table_index));
|
||||||
code.pcmpeqb(xmm0, masked);
|
code.pcmpeqb(xmm0, masked);
|
||||||
}
|
}
|
||||||
code.pshufb(xmm_table, indicies);
|
code.pshufb(xmm_table, indicies);
|
||||||
|
@ -4805,11 +4805,11 @@ void EmitX64::EmitVectorTranspose8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const bool part = args[2].GetImmediateU1();
|
const bool part = args[2].GetImmediateU1();
|
||||||
|
|
||||||
if (!part) {
|
if (!part) {
|
||||||
code.pand(lower, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
|
code.pand(lower, code.XmmBConst<16>(xword, 0x00FF));
|
||||||
code.psllw(upper, 8);
|
code.psllw(upper, 8);
|
||||||
} else {
|
} else {
|
||||||
code.psrlw(lower, 8);
|
code.psrlw(lower, 8);
|
||||||
code.pand(upper, code.MConst(xword, 0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00));
|
code.pand(upper, code.XmmBConst<16>(xword, 0xFF00));
|
||||||
}
|
}
|
||||||
code.por(lower, upper);
|
code.por(lower, upper);
|
||||||
|
|
||||||
|
@ -4824,11 +4824,11 @@ void EmitX64::EmitVectorTranspose16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const bool part = args[2].GetImmediateU1();
|
const bool part = args[2].GetImmediateU1();
|
||||||
|
|
||||||
if (!part) {
|
if (!part) {
|
||||||
code.pand(lower, code.MConst(xword, 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF));
|
code.pand(lower, code.XmmBConst<32>(xword, 0x0000FFFF));
|
||||||
code.pslld(upper, 16);
|
code.pslld(upper, 16);
|
||||||
} else {
|
} else {
|
||||||
code.psrld(lower, 16);
|
code.psrld(lower, 16);
|
||||||
code.pand(upper, code.MConst(xword, 0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000));
|
code.pand(upper, code.XmmBConst<32>(xword, 0xFFFF0000));
|
||||||
}
|
}
|
||||||
code.por(lower, upper);
|
code.por(lower, upper);
|
||||||
|
|
||||||
|
@ -4899,7 +4899,7 @@ static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx,
|
||||||
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
|
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
||||||
code.movdqa(temp, code.MConst(xword, 0x8000000080000000, 0x8000000080000000));
|
code.movdqa(temp, code.XmmBConst<32>(xword, 0x80000000));
|
||||||
code.pxor(x, temp);
|
code.pxor(x, temp);
|
||||||
code.pxor(y, temp);
|
code.pxor(y, temp);
|
||||||
code.movdqa(temp, x);
|
code.movdqa(temp, x);
|
||||||
|
|
|
@ -146,20 +146,12 @@ void HandleNaNs(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, std::
|
||||||
|
|
||||||
template<size_t fsize>
|
template<size_t fsize>
|
||||||
Xbyak::Address GetVectorOf(BlockOfCode& code, u64 value) {
|
Xbyak::Address GetVectorOf(BlockOfCode& code, u64 value) {
|
||||||
if constexpr (fsize == 32) {
|
return code.XmmBConst<fsize>(xword, value);
|
||||||
return code.MConst(xword, (value << 32) | value, (value << 32) | value);
|
|
||||||
} else {
|
|
||||||
return code.MConst(xword, value, value);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<size_t fsize, u64 value>
|
template<size_t fsize, u64 value>
|
||||||
Xbyak::Address GetVectorOf(BlockOfCode& code) {
|
Xbyak::Address GetVectorOf(BlockOfCode& code) {
|
||||||
if constexpr (fsize == 32) {
|
return code.XmmBConst<fsize>(xword, value);
|
||||||
return code.MConst(xword, (value << 32) | value, (value << 32) | value);
|
|
||||||
} else {
|
|
||||||
return code.MConst(xword, value, value);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<size_t fsize>
|
template<size_t fsize>
|
||||||
|
@ -215,7 +207,7 @@ void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm result) {
|
||||||
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
||||||
constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero,
|
constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero,
|
||||||
FpFixup::PosZero);
|
FpFixup::PosZero);
|
||||||
FCODE(vfixupimmp)(result, result, code.MConst(ptr_b, u64(nan_to_zero)), u8(0));
|
FCODE(vfixupimmp)(result, result, code.XmmBConst<32>(ptr_b, nan_to_zero), u8(0));
|
||||||
} else if (code.HasHostFeature(HostFeature::AVX)) {
|
} else if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
FCODE(vcmpordp)(nan_mask, result, result);
|
FCODE(vcmpordp)(nan_mask, result, result);
|
||||||
FCODE(vandp)(result, result, nan_mask);
|
FCODE(vandp)(result, result, nan_mask);
|
||||||
|
@ -239,9 +231,8 @@ void DenormalsAreZero(BlockOfCode& code, FP::FPCR fpcr, std::initializer_list<Xb
|
||||||
FpFixup::Norm_Src,
|
FpFixup::Norm_Src,
|
||||||
FpFixup::Norm_Src,
|
FpFixup::Norm_Src,
|
||||||
FpFixup::Norm_Src);
|
FpFixup::Norm_Src);
|
||||||
constexpr u64 denormal_to_zero64 = mcl::bit::replicate_element<fsize, u64>(denormal_to_zero);
|
|
||||||
|
|
||||||
FCODE(vmovap)(tmp, code.MConst(xword, u64(denormal_to_zero64), u64(denormal_to_zero64)));
|
FCODE(vmovap)(tmp, code.XmmBConst<fsize>(xword, denormal_to_zero));
|
||||||
|
|
||||||
for (const Xbyak::Xmm& xmm : to_daz) {
|
for (const Xbyak::Xmm& xmm : to_daz) {
|
||||||
FCODE(vfixupimmp)(xmm, xmm, tmp, u8(0));
|
FCODE(vfixupimmp)(xmm, xmm, tmp, u8(0));
|
||||||
|
@ -589,12 +580,11 @@ template<size_t fsize>
|
||||||
void FPVectorAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
void FPVectorAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
using FPT = mcl::unsigned_integer_of_size<fsize>;
|
using FPT = mcl::unsigned_integer_of_size<fsize>;
|
||||||
constexpr FPT non_sign_mask = FP::FPInfo<FPT>::sign_mask - FPT(1u);
|
constexpr FPT non_sign_mask = FP::FPInfo<FPT>::sign_mask - FPT(1u);
|
||||||
constexpr u64 non_sign_mask64 = mcl::bit::replicate_element<fsize, u64>(non_sign_mask);
|
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Address mask = code.MConst(xword, non_sign_mask64, non_sign_mask64);
|
const Xbyak::Address mask = code.XmmBConst<fsize>(xword, non_sign_mask);
|
||||||
|
|
||||||
code.andps(a, mask);
|
code.andps(a, mask);
|
||||||
|
|
||||||
|
@ -788,9 +778,9 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst)
|
||||||
if (code.HasHostFeature(HostFeature::AVX512_Ortho)) {
|
if (code.HasHostFeature(HostFeature::AVX512_Ortho)) {
|
||||||
code.vcvtudq2ps(xmm, xmm);
|
code.vcvtudq2ps(xmm, xmm);
|
||||||
} else {
|
} else {
|
||||||
const Xbyak::Address mem_4B000000 = code.MConst(xword, 0x4B0000004B000000, 0x4B0000004B000000);
|
const Xbyak::Address mem_4B000000 = code.XmmBConst<32>(xword, 0x4B000000);
|
||||||
const Xbyak::Address mem_53000000 = code.MConst(xword, 0x5300000053000000, 0x5300000053000000);
|
const Xbyak::Address mem_53000000 = code.XmmBConst<32>(xword, 0x53000000);
|
||||||
const Xbyak::Address mem_D3000080 = code.MConst(xword, 0xD3000080D3000080, 0xD3000080D3000080);
|
const Xbyak::Address mem_D3000080 = code.XmmBConst<32>(xword, 0xD3000080);
|
||||||
|
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
@ -801,7 +791,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst)
|
||||||
code.vaddps(xmm, xmm, mem_D3000080);
|
code.vaddps(xmm, xmm, mem_D3000080);
|
||||||
code.vaddps(xmm, tmp, xmm);
|
code.vaddps(xmm, tmp, xmm);
|
||||||
} else {
|
} else {
|
||||||
const Xbyak::Address mem_0xFFFF = code.MConst(xword, 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF);
|
const Xbyak::Address mem_0xFFFF = code.XmmBConst<32>(xword, 0x0000FFFF);
|
||||||
|
|
||||||
code.movdqa(tmp, mem_0xFFFF);
|
code.movdqa(tmp, mem_0xFFFF);
|
||||||
|
|
||||||
|
@ -819,7 +809,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx.FPCR(fpcr_controlled).RMode() == FP::RoundingMode::TowardsMinusInfinity) {
|
if (ctx.FPCR(fpcr_controlled).RMode() == FP::RoundingMode::TowardsMinusInfinity) {
|
||||||
code.pand(xmm, code.MConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF));
|
code.pand(xmm, code.XmmBConst<32>(xword, 0x7FFFFFFF));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -838,8 +828,8 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst)
|
||||||
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
||||||
code.vcvtuqq2pd(xmm, xmm);
|
code.vcvtuqq2pd(xmm, xmm);
|
||||||
} else {
|
} else {
|
||||||
const Xbyak::Address unpack = code.MConst(xword, 0x4530000043300000, 0);
|
const Xbyak::Address unpack = code.XmmConst(xword, 0x4530000043300000, 0);
|
||||||
const Xbyak::Address subtrahend = code.MConst(xword, 0x4330000000000000, 0x4530000000000000);
|
const Xbyak::Address subtrahend = code.XmmConst(xword, 0x4330000000000000, 0x4530000000000000);
|
||||||
|
|
||||||
const Xbyak::Xmm unpack_reg = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm unpack_reg = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm subtrahend_reg = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm subtrahend_reg = ctx.reg_alloc.ScratchXmm();
|
||||||
|
@ -886,7 +876,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx.FPCR(fpcr_controlled).RMode() == FP::RoundingMode::TowardsMinusInfinity) {
|
if (ctx.FPCR(fpcr_controlled).RMode() == FP::RoundingMode::TowardsMinusInfinity) {
|
||||||
code.pand(xmm, code.MConst(xword, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF));
|
code.pand(xmm, code.XmmBConst<64>(xword, 0x7FFFFFFFFFFFFFFF));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1255,12 +1245,11 @@ template<size_t fsize>
|
||||||
void FPVectorNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
void FPVectorNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
using FPT = mcl::unsigned_integer_of_size<fsize>;
|
using FPT = mcl::unsigned_integer_of_size<fsize>;
|
||||||
constexpr FPT sign_mask = FP::FPInfo<FPT>::sign_mask;
|
constexpr FPT sign_mask = FP::FPInfo<FPT>::sign_mask;
|
||||||
constexpr u64 sign_mask64 = mcl::bit::replicate_element<fsize, u64>(sign_mask);
|
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Address mask = code.MConst(xword, sign_mask64, sign_mask64);
|
const Xbyak::Address mask = code.XmmBConst<fsize>(xword, sign_mask);
|
||||||
|
|
||||||
code.xorps(a, mask);
|
code.xorps(a, mask);
|
||||||
|
|
||||||
|
|
|
@ -72,7 +72,7 @@ enum class Op {
|
||||||
template<Op op, size_t esize>
|
template<Op op, size_t esize>
|
||||||
void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
static_assert(esize == 32 || esize == 64);
|
static_assert(esize == 32 || esize == 64);
|
||||||
constexpr u64 msb_mask = esize == 32 ? 0x8000000080000000 : 0x8000000000000000;
|
constexpr u64 msb_mask = esize == 32 ? 0x80000000 : 0x8000000000000000;
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
@ -97,7 +97,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
code.vpmovq2m(k1, xmm0);
|
code.vpmovq2m(k1, xmm0);
|
||||||
}
|
}
|
||||||
ICODE(vpsra)(result | k1, result, u8(esize - 1));
|
ICODE(vpsra)(result | k1, result, u8(esize - 1));
|
||||||
ICODE(vpxor)(result | k1, result, code.MConst(xword_b, msb_mask, msb_mask));
|
ICODE(vpxor)(result | k1, result, code.XmmBConst<esize>(xword_b, msb_mask));
|
||||||
|
|
||||||
code.ktestb(k1, k1);
|
code.ktestb(k1, k1);
|
||||||
code.setnz(overflow);
|
code.setnz(overflow);
|
||||||
|
@ -148,10 +148,10 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
if constexpr (esize == 64) {
|
if constexpr (esize == 64) {
|
||||||
code.pshufd(tmp, tmp, 0b11110101);
|
code.pshufd(tmp, tmp, 0b11110101);
|
||||||
}
|
}
|
||||||
code.pxor(tmp, code.MConst(xword, msb_mask, msb_mask));
|
code.pxor(tmp, code.XmmBConst<esize>(xword, msb_mask));
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::SSE41)) {
|
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||||
code.ptest(xmm0, code.MConst(xword, msb_mask, msb_mask));
|
code.ptest(xmm0, code.XmmBConst<esize>(xword, msb_mask));
|
||||||
} else {
|
} else {
|
||||||
FCODE(movmskp)(overflow.cvt32(), xmm0);
|
FCODE(movmskp)(overflow.cvt32(), xmm0);
|
||||||
code.test(overflow.cvt32(), overflow.cvt32());
|
code.test(overflow.cvt32(), overflow.cvt32());
|
||||||
|
|
|
@ -541,7 +541,7 @@ HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) {
|
||||||
if (imm_value == 0) {
|
if (imm_value == 0) {
|
||||||
MAYBE_AVX(xorps, reg, reg);
|
MAYBE_AVX(xorps, reg, reg);
|
||||||
} else {
|
} else {
|
||||||
MAYBE_AVX(movaps, reg, code.MConst(code.xword, imm_value));
|
MAYBE_AVX(movaps, reg, code.XmmBConst<64>(code.xword, imm_value));
|
||||||
}
|
}
|
||||||
return host_loc;
|
return host_loc;
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,4 +17,12 @@ inline void Or(volatile u32* ptr, u32 value) {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void And(volatile u32* ptr, u32 value) {
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
_InterlockedAnd(reinterpret_cast<volatile long*>(ptr), value);
|
||||||
|
#else
|
||||||
|
__atomic_and_fetch(ptr, value, __ATOMIC_SEQ_CST);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Atomic
|
} // namespace Dynarmic::Atomic
|
||||||
|
|
|
@ -57,10 +57,15 @@ public:
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stops execution in Jit::Run.
|
* Stops execution in Jit::Run.
|
||||||
* Can only be called from a callback.
|
|
||||||
*/
|
*/
|
||||||
void HaltExecution(HaltReason hr = HaltReason::UserDefined1);
|
void HaltExecution(HaltReason hr = HaltReason::UserDefined1);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clears a halt reason from flags.
|
||||||
|
* Warning: Only use this if you're sure this won't introduce races.
|
||||||
|
*/
|
||||||
|
void ClearHalt(HaltReason hr = HaltReason::UserDefined1);
|
||||||
|
|
||||||
/// View and modify registers.
|
/// View and modify registers.
|
||||||
std::array<std::uint32_t, 16>& Regs();
|
std::array<std::uint32_t, 16>& Regs();
|
||||||
const std::array<std::uint32_t, 16>& Regs() const;
|
const std::array<std::uint32_t, 16>& Regs() const;
|
||||||
|
|
|
@ -58,10 +58,15 @@ public:
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stops execution in Jit::Run.
|
* Stops execution in Jit::Run.
|
||||||
* Can only be called from a callback.
|
|
||||||
*/
|
*/
|
||||||
void HaltExecution(HaltReason hr = HaltReason::UserDefined1);
|
void HaltExecution(HaltReason hr = HaltReason::UserDefined1);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clears a halt reason from flags.
|
||||||
|
* Warning: Only use this if you're sure this won't introduce races.
|
||||||
|
*/
|
||||||
|
void ClearHalt(HaltReason hr = HaltReason::UserDefined1);
|
||||||
|
|
||||||
/// Read Stack Pointer
|
/// Read Stack Pointer
|
||||||
std::uint64_t GetSP() const;
|
std::uint64_t GetSP() const;
|
||||||
/// Modify Stack Pointer
|
/// Modify Stack Pointer
|
||||||
|
|
113
externals/dynarmic/tests/A64/test_invalidation.cpp
vendored
Executable file
113
externals/dynarmic/tests/A64/test_invalidation.cpp
vendored
Executable file
|
@ -0,0 +1,113 @@
|
||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2018 MerryMage
|
||||||
|
* SPDX-License-Identifier: 0BSD
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <catch2/catch.hpp>
|
||||||
|
|
||||||
|
#include "./testenv.h"
|
||||||
|
#include "dynarmic/interface/A64/a64.h"
|
||||||
|
|
||||||
|
using namespace Dynarmic;
|
||||||
|
|
||||||
|
TEST_CASE("ensure fast dispatch entry is cleared even when a block does not have any patching requirements", "[a64]") {
|
||||||
|
A64TestEnv env;
|
||||||
|
|
||||||
|
A64::UserConfig conf{&env};
|
||||||
|
A64::Jit jit{conf};
|
||||||
|
|
||||||
|
REQUIRE(conf.HasOptimization(OptimizationFlag::FastDispatch));
|
||||||
|
|
||||||
|
env.code_mem_start_address = 100;
|
||||||
|
env.code_mem.clear();
|
||||||
|
env.code_mem.emplace_back(0xd2800d80); // MOV X0, 108
|
||||||
|
env.code_mem.emplace_back(0xd61f0000); // BR X0
|
||||||
|
env.code_mem.emplace_back(0xd2800540); // MOV X0, 42
|
||||||
|
env.code_mem.emplace_back(0x14000000); // B .
|
||||||
|
|
||||||
|
jit.SetPC(100);
|
||||||
|
env.ticks_left = 4;
|
||||||
|
jit.Run();
|
||||||
|
REQUIRE(jit.GetRegister(0) == 42);
|
||||||
|
|
||||||
|
jit.SetPC(100);
|
||||||
|
env.ticks_left = 4;
|
||||||
|
jit.Run();
|
||||||
|
REQUIRE(jit.GetRegister(0) == 42);
|
||||||
|
|
||||||
|
jit.InvalidateCacheRange(108, 4);
|
||||||
|
|
||||||
|
jit.SetPC(100);
|
||||||
|
env.ticks_left = 4;
|
||||||
|
jit.Run();
|
||||||
|
REQUIRE(jit.GetRegister(0) == 42);
|
||||||
|
|
||||||
|
env.code_mem[2] = 0xd28008a0; // MOV X0, 69
|
||||||
|
|
||||||
|
jit.SetPC(100);
|
||||||
|
env.ticks_left = 4;
|
||||||
|
jit.Run();
|
||||||
|
REQUIRE(jit.GetRegister(0) == 42);
|
||||||
|
|
||||||
|
jit.InvalidateCacheRange(108, 4);
|
||||||
|
|
||||||
|
jit.SetPC(100);
|
||||||
|
env.ticks_left = 4;
|
||||||
|
jit.Run();
|
||||||
|
REQUIRE(jit.GetRegister(0) == 69);
|
||||||
|
|
||||||
|
jit.SetPC(100);
|
||||||
|
env.ticks_left = 4;
|
||||||
|
jit.Run();
|
||||||
|
REQUIRE(jit.GetRegister(0) == 69);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("ensure fast dispatch entry is cleared even when a block does not have any patching requirements 2", "[a64]") {
|
||||||
|
A64TestEnv env;
|
||||||
|
|
||||||
|
A64::UserConfig conf{&env};
|
||||||
|
A64::Jit jit{conf};
|
||||||
|
|
||||||
|
REQUIRE(conf.HasOptimization(OptimizationFlag::FastDispatch));
|
||||||
|
|
||||||
|
env.code_mem.emplace_back(0xd2800100); // MOV X0, 8
|
||||||
|
env.code_mem.emplace_back(0xd61f0000); // BR X0
|
||||||
|
env.code_mem.emplace_back(0xd2800540); // MOV X0, 42
|
||||||
|
env.code_mem.emplace_back(0x14000000); // B .
|
||||||
|
|
||||||
|
jit.SetPC(0);
|
||||||
|
env.ticks_left = 4;
|
||||||
|
jit.Run();
|
||||||
|
REQUIRE(jit.GetRegister(0) == 42);
|
||||||
|
|
||||||
|
jit.SetPC(0);
|
||||||
|
env.ticks_left = 4;
|
||||||
|
jit.Run();
|
||||||
|
REQUIRE(jit.GetRegister(0) == 42);
|
||||||
|
|
||||||
|
jit.InvalidateCacheRange(8, 4);
|
||||||
|
|
||||||
|
jit.SetPC(0);
|
||||||
|
env.ticks_left = 4;
|
||||||
|
jit.Run();
|
||||||
|
REQUIRE(jit.GetRegister(0) == 42);
|
||||||
|
|
||||||
|
env.code_mem[2] = 0xd28008a0; // MOV X0, 69
|
||||||
|
|
||||||
|
jit.SetPC(0);
|
||||||
|
env.ticks_left = 4;
|
||||||
|
jit.Run();
|
||||||
|
REQUIRE(jit.GetRegister(0) == 42);
|
||||||
|
|
||||||
|
jit.InvalidateCacheRange(8, 4);
|
||||||
|
|
||||||
|
jit.SetPC(0);
|
||||||
|
env.ticks_left = 4;
|
||||||
|
jit.Run();
|
||||||
|
REQUIRE(jit.GetRegister(0) == 69);
|
||||||
|
|
||||||
|
jit.SetPC(0);
|
||||||
|
env.ticks_left = 4;
|
||||||
|
jit.Run();
|
||||||
|
REQUIRE(jit.GetRegister(0) == 69);
|
||||||
|
}
|
3
externals/dynarmic/tests/CMakeLists.txt
vendored
3
externals/dynarmic/tests/CMakeLists.txt
vendored
|
@ -20,6 +20,8 @@ endif()
|
||||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||||
target_sources(dynarmic_tests PRIVATE
|
target_sources(dynarmic_tests PRIVATE
|
||||||
A64/a64.cpp
|
A64/a64.cpp
|
||||||
|
A64/misaligned_page_table.cpp
|
||||||
|
A64/test_invalidation.cpp
|
||||||
A64/testenv.h
|
A64/testenv.h
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
@ -44,7 +46,6 @@ if (DYNARMIC_TESTS_USE_UNICORN)
|
||||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||||
target_sources(dynarmic_tests PRIVATE
|
target_sources(dynarmic_tests PRIVATE
|
||||||
A64/fuzz_with_unicorn.cpp
|
A64/fuzz_with_unicorn.cpp
|
||||||
A64/misaligned_page_table.cpp
|
|
||||||
A64/verify_unicorn.cpp
|
A64/verify_unicorn.cpp
|
||||||
unicorn_emu/a64_unicorn.cpp
|
unicorn_emu/a64_unicorn.cpp
|
||||||
unicorn_emu/a64_unicorn.h
|
unicorn_emu/a64_unicorn.h
|
||||||
|
|
Loading…
Reference in a new issue