early-access version 2829

This commit is contained in:
pineappleEA 2022-07-12 16:46:34 +02:00
parent aae4e12805
commit 3df4ab9726
71 changed files with 17304 additions and 384 deletions

View file

@ -1,7 +1,7 @@
yuzu emulator early access yuzu emulator early access
============= =============
This is the source code for early-access 2828. This is the source code for early-access 2829.
## Legal Notice ## Legal Notice

View file

@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.8) cmake_minimum_required(VERSION 3.8)
project(dynarmic LANGUAGES C CXX ASM VERSION 6.0.1) project(dynarmic LANGUAGES C CXX ASM VERSION 6.1.1)
# Determine if we're built as a subproject (using add_subdirectory) # Determine if we're built as a subproject (using add_subdirectory)
# or if this is the master project. # or if this is the master project.
@ -131,12 +131,6 @@ if (DYNARMIC_NO_BUNDLED_ROBIN_MAP AND NOT TARGET tsl::robin_map)
find_package(tsl-robin-map REQUIRED) find_package(tsl-robin-map REQUIRED)
endif() endif()
if (DYNARMIC_NO_BUNDLED_VIXL AND ARCHITECTURE STREQUAL "arm64")
find_package(PkgConfig REQUIRED)
pkg_check_modules(vixl REQUIRED IMPORTED_TARGET vixl)
add_library(vixl ALIAS PkgConfig::vixl)
endif()
if (DYNARMIC_NO_BUNDLED_XBYAK AND NOT TARGET xbyak) if (DYNARMIC_NO_BUNDLED_XBYAK AND NOT TARGET xbyak)
if (ARCHITECTURE STREQUAL "x86" OR ARCHITECTURE STREQUAL "x86_64") if (ARCHITECTURE STREQUAL "x86" OR ARCHITECTURE STREQUAL "x86_64")
find_package(xbyak REQUIRED) find_package(xbyak REQUIRED)

View file

@ -28,18 +28,20 @@ if (NOT TARGET merry::mcl)
add_subdirectory(mcl) add_subdirectory(mcl)
endif() endif()
# oaknut
if (NOT TARGET merry::oaknut)
if (ARCHITECTURE STREQUAL "arm64")
add_subdirectory(oaknut)
endif()
endif()
# robin-map # robin-map
if (NOT TARGET tsl::robin_map) if (NOT TARGET tsl::robin_map)
add_subdirectory(robin-map) add_subdirectory(robin-map)
endif() endif()
# vixl
if (ARCHITECTURE STREQUAL "arm64" AND NOT TARGET vixl)
add_subdirectory(vixl EXCLUDE_FROM_ALL)
endif()
# xbyak # xbyak
if (NOT TARGET xbyak) if (NOT TARGET xbyak)

View file

@ -5,8 +5,8 @@ This repository uses subtrees to manage some of its externals.
``` ```
git remote add externals-fmt https://github.com/fmtlib/fmt.git --no-tags git remote add externals-fmt https://github.com/fmtlib/fmt.git --no-tags
git remote add externals-mcl https://github.com/merryhime/mcl.git --no-tags git remote add externals-mcl https://github.com/merryhime/mcl.git --no-tags
git remote add externals-oaknut https://github.com/merryhime/oaknut.git --no-tags
git remote add externals-robin-map https://github.com/Tessil/robin-map.git --no-tags git remote add externals-robin-map https://github.com/Tessil/robin-map.git --no-tags
git remote add externals-vixl https://git.linaro.org/arm/vixl.git --no-tags
git remote add externals-xbyak https://github.com/herumi/xbyak.git --no-tags git remote add externals-xbyak https://github.com/herumi/xbyak.git --no-tags
git remote add externals-zycore https://github.com/zyantific/zycore-c.git --no-tags git remote add externals-zycore https://github.com/zyantific/zycore-c.git --no-tags
git remote add externals-zydis https://github.com/zyantific/zydis.git --no-tags git remote add externals-zydis https://github.com/zyantific/zydis.git --no-tags
@ -19,15 +19,15 @@ Change `<ref>` to refer to the appropriate git reference.
``` ```
git fetch externals-fmt git fetch externals-fmt
git fetch externals-mcl git fetch externals-mcl
git fetch externals-oaknut
git fetch externals-robin-map git fetch externals-robin-map
git fetch externals-vixl
git fetch externals-xbyak git fetch externals-xbyak
git fetch externals-zycore git fetch externals-zycore
git fetch externals-zydis git fetch externals-zydis
git subtree pull --squash --prefix=externals/fmt externals-fmt <ref> git subtree pull --squash --prefix=externals/fmt externals-fmt <ref>
git subtree pull --squash --prefix=externals/mcl externals-mcl <ref> git subtree pull --squash --prefix=externals/mcl externals-mcl <ref>
git subtree pull --squash --prefix=externals/oaknut externals-oaknut <ref>
git subtree pull --squash --prefix=externals/robin-map externals-robin-map <ref> git subtree pull --squash --prefix=externals/robin-map externals-robin-map <ref>
git subtree pull --squash --prefix=externals/vixl/vixl externals-vixl <ref>
git subtree pull --squash --prefix=externals/xbyak externals-xbyak <ref> git subtree pull --squash --prefix=externals/xbyak externals-xbyak <ref>
git subtree pull --squash --prefix=externals/zycore externals-zycore <ref> git subtree pull --squash --prefix=externals/zycore externals-zycore <ref>
git subtree pull --squash --prefix=externals/zydis externals-zydis <ref> git subtree pull --squash --prefix=externals/zydis externals-zydis <ref>

View file

@ -34,7 +34,7 @@ BraceWrapping:
AfterClass: false AfterClass: false
AfterControlStatement: Never AfterControlStatement: Never
AfterEnum: false AfterEnum: false
AfterFunction: false AfterFunction: true
AfterNamespace: false AfterNamespace: false
AfterObjCDeclaration: false AfterObjCDeclaration: false
AfterStruct: false AfterStruct: false
@ -62,7 +62,7 @@ ColumnLimit: 0
CommentPragmas: '^ IWYU pragma:' CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 8 ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4 ContinuationIndentWidth: 4
Cpp11BracedListStyle: true Cpp11BracedListStyle: true
DeriveLineEnding: true DeriveLineEnding: true

View file

@ -1,10 +1,18 @@
cmake_minimum_required(VERSION 3.12 FATAL_ERROR) cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
include(GNUInstallDirs) include(GNUInstallDirs)
project(mcl LANGUAGES CXX VERSION 0.1.8) project(mcl LANGUAGES CXX VERSION 0.1.11)
# Determine if we're built as a subproject (using add_subdirectory)
# or if this is the master project.
set(MASTER_PROJECT OFF)
if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
set(MASTER_PROJECT ON)
endif()
# Project options # Project options
option(MCL_WARNINGS_AS_ERRORS "Warnings as errors" ON) option(MCL_WARNINGS_AS_ERRORS "Warnings as errors" ${MASTER_PROJECT})
option(MCL_INSTALL "Enable installation" ${MASTER_PROJECT})
# Default to a Release build # Default to a Release build
if (NOT CMAKE_BUILD_TYPE) if (NOT CMAKE_BUILD_TYPE)
@ -85,7 +93,7 @@ endif()
# Dependencies # Dependencies
if (NOT TARGET Catch2::Catch2) if (NOT TARGET Catch2::Catch2)
find_package(Catch2 QUIET) find_package(Catch2 3 QUIET)
endif() endif()
if (NOT TARGET fmt::fmt) if (NOT TARGET fmt::fmt)
@ -95,32 +103,33 @@ endif()
# Project files # Project files
add_subdirectory(src) add_subdirectory(src)
if (TARGET Catch2::Catch2) if (TARGET Catch2::Catch2 AND MASTER_PROJECT)
add_subdirectory(tests) add_subdirectory(tests)
endif() endif()
# Install instructions # Install instructions
if (MCL_INSTALL)
include(GNUInstallDirs)
include(CMakePackageConfigHelpers)
include(GNUInstallDirs) install(TARGETS mcl EXPORT mclTargets)
include(CMakePackageConfigHelpers) install(EXPORT mclTargets
install(TARGETS mcl EXPORT mclTargets)
install(EXPORT mclTargets
NAMESPACE merry:: NAMESPACE merry::
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/mcl" DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/mcl"
) )
configure_package_config_file(CMakeModules/mclConfig.cmake.in configure_package_config_file(CMakeModules/mclConfig.cmake.in
mclConfig.cmake mclConfig.cmake
INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/mcl" INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/mcl"
) )
write_basic_package_version_file(mclConfigVersion.cmake write_basic_package_version_file(mclConfigVersion.cmake
COMPATIBILITY SameMajorVersion COMPATIBILITY SameMajorVersion
) )
install(FILES install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/mclConfig.cmake" "${CMAKE_CURRENT_BINARY_DIR}/mclConfig.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/mclConfigVersion.cmake" "${CMAKE_CURRENT_BINARY_DIR}/mclConfigVersion.cmake"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/mcl" DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/mcl"
) )
install(DIRECTORY include/ TYPE INCLUDE FILES_MATCHING PATTERN "*.hpp") install(DIRECTORY include/ TYPE INCLUDE FILES_MATCHING PATTERN "*.hpp")
endif()

View file

@ -13,11 +13,12 @@
namespace mcl::detail { namespace mcl::detail {
[[noreturn]] void assert_terminate_impl(fmt::string_view msg, fmt::format_args args); [[noreturn]] void assert_terminate_impl(const char* expr_str, fmt::string_view msg, fmt::format_args args);
template<typename... Ts> template<typename... Ts>
[[noreturn]] void assert_terminate(fmt::string_view msg, Ts... args) { [[noreturn]] void assert_terminate(const char* expr_str, fmt::string_view msg, Ts... args)
assert_terminate_impl(msg, fmt::make_format_args(args...)); {
assert_terminate_impl(expr_str, msg, fmt::make_format_args(args...));
} }
} // namespace mcl::detail } // namespace mcl::detail
@ -32,7 +33,7 @@ template<typename... Ts>
} \ } \
} else { \ } else { \
if (!(expr)) [[unlikely]] { \ if (!(expr)) [[unlikely]] { \
::mcl::detail::assert_terminate(#expr); \ ::mcl::detail::assert_terminate(#expr, "(none)"); \
} \ } \
} \ } \
}() }()
@ -45,12 +46,12 @@ template<typename... Ts>
} \ } \
} else { \ } else { \
if (!(expr)) [[unlikely]] { \ if (!(expr)) [[unlikely]] { \
::mcl::detail::assert_terminate(#expr "\nMessage: " __VA_ARGS__); \ ::mcl::detail::assert_terminate(#expr, __VA_ARGS__); \
} \ } \
} \ } \
}() }()
#define ASSERT_FALSE(...) ::mcl::detail::assert_terminate("false\nMessage: " __VA_ARGS__) #define ASSERT_FALSE(...) ::mcl::detail::assert_terminate("false", __VA_ARGS__)
#if defined(NDEBUG) || defined(MCL_IGNORE_ASSERTS) #if defined(NDEBUG) || defined(MCL_IGNORE_ASSERTS)
# define DEBUG_ASSERT(expr) ASSUME(expr) # define DEBUG_ASSERT(expr) ASSUME(expr)

View file

@ -13,12 +13,14 @@
namespace mcl::bit { namespace mcl::bit {
template<BitIntegral T> template<BitIntegral T>
inline size_t count_ones(T x) { inline size_t count_ones(T x)
{
return std::bitset<bitsizeof<T>>(x).count(); return std::bitset<bitsizeof<T>>(x).count();
} }
template<BitIntegral T> template<BitIntegral T>
constexpr size_t count_leading_zeros(T x) { constexpr size_t count_leading_zeros(T x)
{
size_t result = bitsizeof<T>; size_t result = bitsizeof<T>;
while (x != 0) { while (x != 0) {
x >>= 1; x >>= 1;
@ -28,7 +30,8 @@ constexpr size_t count_leading_zeros(T x) {
} }
template<BitIntegral T> template<BitIntegral T>
constexpr int highest_set_bit(T x) { constexpr int highest_set_bit(T x)
{
int result = -1; int result = -1;
while (x != 0) { while (x != 0) {
x >>= 1; x >>= 1;
@ -38,7 +41,8 @@ constexpr int highest_set_bit(T x) {
} }
template<BitIntegral T> template<BitIntegral T>
constexpr size_t lowest_set_bit(T x) { constexpr size_t lowest_set_bit(T x)
{
if (x == 0) { if (x == 0) {
return bitsizeof<T>; return bitsizeof<T>;
} }

View file

@ -13,7 +13,8 @@ namespace mcl::bit {
/// Create a mask with `count` number of one bits. /// Create a mask with `count` number of one bits.
template<size_t count, BitIntegral T> template<size_t count, BitIntegral T>
constexpr T ones() { constexpr T ones()
{
static_assert(count <= bitsizeof<T>, "count larger than bitsize of T"); static_assert(count <= bitsizeof<T>, "count larger than bitsize of T");
if constexpr (count == 0) { if constexpr (count == 0) {
@ -25,7 +26,8 @@ constexpr T ones() {
/// Create a mask with `count` number of one bits. /// Create a mask with `count` number of one bits.
template<BitIntegral T> template<BitIntegral T>
constexpr T ones(size_t count) { constexpr T ones(size_t count)
{
ASSERT_MSG(count <= bitsizeof<T>, "count larger than bitsize of T"); ASSERT_MSG(count <= bitsizeof<T>, "count larger than bitsize of T");
if (count == 0) { if (count == 0) {
@ -36,7 +38,8 @@ constexpr T ones(size_t count) {
/// Create a mask of type T for bits [begin_bit, end_bit] inclusive. /// Create a mask of type T for bits [begin_bit, end_bit] inclusive.
template<size_t begin_bit, size_t end_bit, BitIntegral T> template<size_t begin_bit, size_t end_bit, BitIntegral T>
constexpr T mask() { constexpr T mask()
{
static_assert(begin_bit <= end_bit, "invalid bit range (position of beginning bit cannot be greater than that of end bit)"); static_assert(begin_bit <= end_bit, "invalid bit range (position of beginning bit cannot be greater than that of end bit)");
static_assert(begin_bit < bitsizeof<T>, "begin_bit must be smaller than size of T"); static_assert(begin_bit < bitsizeof<T>, "begin_bit must be smaller than size of T");
static_assert(end_bit < bitsizeof<T>, "end_bit must be smaller than size of T"); static_assert(end_bit < bitsizeof<T>, "end_bit must be smaller than size of T");
@ -46,7 +49,8 @@ constexpr T mask() {
/// Create a mask of type T for bits [begin_bit, end_bit] inclusive. /// Create a mask of type T for bits [begin_bit, end_bit] inclusive.
template<BitIntegral T> template<BitIntegral T>
constexpr T mask(size_t begin_bit, size_t end_bit) { constexpr T mask(size_t begin_bit, size_t end_bit)
{
ASSERT_MSG(begin_bit <= end_bit, "invalid bit range (position of beginning bit cannot be greater than that of end bit)"); ASSERT_MSG(begin_bit <= end_bit, "invalid bit range (position of beginning bit cannot be greater than that of end bit)");
ASSERT_MSG(begin_bit < bitsizeof<T>, "begin_bit must be smaller than size of T"); ASSERT_MSG(begin_bit < bitsizeof<T>, "begin_bit must be smaller than size of T");
ASSERT_MSG(end_bit < bitsizeof<T>, "end_bit must be smaller than size of T"); ASSERT_MSG(end_bit < bitsizeof<T>, "end_bit must be smaller than size of T");
@ -56,91 +60,104 @@ constexpr T mask(size_t begin_bit, size_t end_bit) {
/// Extract bits [begin_bit, end_bit] inclusive from value of type T. /// Extract bits [begin_bit, end_bit] inclusive from value of type T.
template<size_t begin_bit, size_t end_bit, BitIntegral T> template<size_t begin_bit, size_t end_bit, BitIntegral T>
constexpr T get_bits(T value) { constexpr T get_bits(T value)
{
constexpr T m = mask<begin_bit, end_bit, T>(); constexpr T m = mask<begin_bit, end_bit, T>();
return (value & m) >> begin_bit; return (value & m) >> begin_bit;
} }
/// Extract bits [begin_bit, end_bit] inclusive from value of type T. /// Extract bits [begin_bit, end_bit] inclusive from value of type T.
template<BitIntegral T> template<BitIntegral T>
constexpr T get_bits(size_t begin_bit, size_t end_bit, T value) { constexpr T get_bits(size_t begin_bit, size_t end_bit, T value)
{
const T m = mask<T>(begin_bit, end_bit); const T m = mask<T>(begin_bit, end_bit);
return (value & m) >> begin_bit; return (value & m) >> begin_bit;
} }
/// Clears bits [begin_bit, end_bit] inclusive of value of type T. /// Clears bits [begin_bit, end_bit] inclusive of value of type T.
template<size_t begin_bit, size_t end_bit, BitIntegral T> template<size_t begin_bit, size_t end_bit, BitIntegral T>
constexpr T clear_bits(T value) { constexpr T clear_bits(T value)
{
constexpr T m = mask<begin_bit, end_bit, T>(); constexpr T m = mask<begin_bit, end_bit, T>();
return value & ~m; return value & ~m;
} }
/// Clears bits [begin_bit, end_bit] inclusive of value of type T. /// Clears bits [begin_bit, end_bit] inclusive of value of type T.
template<BitIntegral T> template<BitIntegral T>
constexpr T clear_bits(size_t begin_bit, size_t end_bit, T value) { constexpr T clear_bits(size_t begin_bit, size_t end_bit, T value)
{
const T m = mask<T>(begin_bit, end_bit); const T m = mask<T>(begin_bit, end_bit);
return value & ~m; return value & ~m;
} }
/// Modifies bits [begin_bit, end_bit] inclusive of value of type T. /// Modifies bits [begin_bit, end_bit] inclusive of value of type T.
template<size_t begin_bit, size_t end_bit, BitIntegral T> template<size_t begin_bit, size_t end_bit, BitIntegral T>
constexpr T set_bits(T value, T new_bits) { constexpr T set_bits(T value, T new_bits)
{
constexpr T m = mask<begin_bit, end_bit, T>(); constexpr T m = mask<begin_bit, end_bit, T>();
return (value & ~m) | ((new_bits << begin_bit) & m); return (value & ~m) | ((new_bits << begin_bit) & m);
} }
/// Modifies bits [begin_bit, end_bit] inclusive of value of type T. /// Modifies bits [begin_bit, end_bit] inclusive of value of type T.
template<BitIntegral T> template<BitIntegral T>
constexpr T set_bits(size_t begin_bit, size_t end_bit, T value, T new_bits) { constexpr T set_bits(size_t begin_bit, size_t end_bit, T value, T new_bits)
{
const T m = mask<T>(begin_bit, end_bit); const T m = mask<T>(begin_bit, end_bit);
return (value & ~m) | ((new_bits << begin_bit) & m); return (value & ~m) | ((new_bits << begin_bit) & m);
} }
/// Extract bit at bit_position from value of type T. /// Extract bit at bit_position from value of type T.
template<size_t bit_position, BitIntegral T> template<size_t bit_position, BitIntegral T>
constexpr bool get_bit(T value) { constexpr bool get_bit(T value)
{
constexpr T m = mask<bit_position, bit_position, T>(); constexpr T m = mask<bit_position, bit_position, T>();
return (value & m) != 0; return (value & m) != 0;
} }
/// Extract bit at bit_position from value of type T. /// Extract bit at bit_position from value of type T.
template<BitIntegral T> template<BitIntegral T>
constexpr bool get_bit(size_t bit_position, T value) { constexpr bool get_bit(size_t bit_position, T value)
{
const T m = mask<T>(bit_position, bit_position); const T m = mask<T>(bit_position, bit_position);
return (value & m) != 0; return (value & m) != 0;
} }
/// Clears bit at bit_position of value of type T. /// Clears bit at bit_position of value of type T.
template<size_t bit_position, BitIntegral T> template<size_t bit_position, BitIntegral T>
constexpr T clear_bit(T value) { constexpr T clear_bit(T value)
{
constexpr T m = mask<bit_position, bit_position, T>(); constexpr T m = mask<bit_position, bit_position, T>();
return value & ~m; return value & ~m;
} }
/// Clears bit at bit_position of value of type T. /// Clears bit at bit_position of value of type T.
template<BitIntegral T> template<BitIntegral T>
constexpr T clear_bit(size_t bit_position, T value) { constexpr T clear_bit(size_t bit_position, T value)
{
const T m = mask<T>(bit_position, bit_position); const T m = mask<T>(bit_position, bit_position);
return value & ~m; return value & ~m;
} }
/// Modifies bit at bit_position of value of type T. /// Modifies bit at bit_position of value of type T.
template<size_t bit_position, BitIntegral T> template<size_t bit_position, BitIntegral T>
constexpr T set_bit(T value, bool new_bit) { constexpr T set_bit(T value, bool new_bit)
{
constexpr T m = mask<bit_position, bit_position, T>(); constexpr T m = mask<bit_position, bit_position, T>();
return (value & ~m) | (new_bit ? m : static_cast<T>(0)); return (value & ~m) | (new_bit ? m : static_cast<T>(0));
} }
/// Modifies bit at bit_position of value of type T. /// Modifies bit at bit_position of value of type T.
template<BitIntegral T> template<BitIntegral T>
constexpr T set_bit(size_t bit_position, T value, bool new_bit) { constexpr T set_bit(size_t bit_position, T value, bool new_bit)
{
const T m = mask<T>(bit_position, bit_position); const T m = mask<T>(bit_position, bit_position);
return (value & ~m) | (new_bit ? m : static_cast<T>(0)); return (value & ~m) | (new_bit ? m : static_cast<T>(0));
} }
/// Sign-extends a value that has bit_count bits to the full bitwidth of type T. /// Sign-extends a value that has bit_count bits to the full bitwidth of type T.
template<size_t bit_count, BitIntegral T> template<size_t bit_count, BitIntegral T>
constexpr T sign_extend(T value) { constexpr T sign_extend(T value)
{
static_assert(bit_count != 0, "cannot sign-extend zero-sized value"); static_assert(bit_count != 0, "cannot sign-extend zero-sized value");
using S = std::make_signed_t<T>; using S = std::make_signed_t<T>;
@ -150,7 +167,8 @@ constexpr T sign_extend(T value) {
/// Sign-extends a value that has bit_count bits to the full bitwidth of type T. /// Sign-extends a value that has bit_count bits to the full bitwidth of type T.
template<BitIntegral T> template<BitIntegral T>
constexpr T sign_extend(size_t bit_count, T value) { constexpr T sign_extend(size_t bit_count, T value)
{
ASSERT_MSG(bit_count != 0, "cannot sign-extend zero-sized value"); ASSERT_MSG(bit_count != 0, "cannot sign-extend zero-sized value");
using S = std::make_signed_t<T>; using S = std::make_signed_t<T>;
@ -160,7 +178,8 @@ constexpr T sign_extend(size_t bit_count, T value) {
/// Replicate an element across a value of type T. /// Replicate an element across a value of type T.
template<size_t element_size, BitIntegral T> template<size_t element_size, BitIntegral T>
constexpr T replicate_element(T value) { constexpr T replicate_element(T value)
{
static_assert(element_size <= bitsizeof<T>, "element_size is too large"); static_assert(element_size <= bitsizeof<T>, "element_size is too large");
static_assert(bitsizeof<T> % element_size == 0, "bitsize of T not divisible by element_size"); static_assert(bitsizeof<T> % element_size == 0, "bitsize of T not divisible by element_size");
@ -173,7 +192,8 @@ constexpr T replicate_element(T value) {
/// Replicate an element of type U across a value of type T. /// Replicate an element of type U across a value of type T.
template<BitIntegral U, BitIntegral T> template<BitIntegral U, BitIntegral T>
constexpr T replicate_element(T value) { constexpr T replicate_element(T value)
{
static_assert(bitsizeof<U> <= bitsizeof<T>, "element_size is too large"); static_assert(bitsizeof<U> <= bitsizeof<T>, "element_size is too large");
return replicate_element<bitsizeof<U>, T>(value); return replicate_element<bitsizeof<U>, T>(value);
@ -181,7 +201,8 @@ constexpr T replicate_element(T value) {
/// Replicate an element across a value of type T. /// Replicate an element across a value of type T.
template<BitIntegral T> template<BitIntegral T>
constexpr T replicate_element(size_t element_size, T value) { constexpr T replicate_element(size_t element_size, T value)
{
ASSERT_MSG(element_size <= bitsizeof<T>, "element_size is too large"); ASSERT_MSG(element_size <= bitsizeof<T>, "element_size is too large");
ASSERT_MSG(bitsizeof<T> % element_size == 0, "bitsize of T not divisible by element_size"); ASSERT_MSG(bitsizeof<T> % element_size == 0, "bitsize of T not divisible by element_size");
@ -192,7 +213,8 @@ constexpr T replicate_element(size_t element_size, T value) {
} }
template<BitIntegral T> template<BitIntegral T>
constexpr bool most_significant_bit(T value) { constexpr bool most_significant_bit(T value)
{
return get_bit<bitsizeof<T> - 1, T>(value); return get_bit<bitsizeof<T> - 1, T>(value);
} }

View file

@ -11,7 +11,8 @@
namespace mcl::bit { namespace mcl::bit {
template<BitIntegral T> template<BitIntegral T>
constexpr T rotate_right(T x, size_t amount) { constexpr T rotate_right(T x, size_t amount)
{
amount %= bitsizeof<T>; amount %= bitsizeof<T>;
if (amount == 0) { if (amount == 0) {
return x; return x;
@ -20,7 +21,8 @@ constexpr T rotate_right(T x, size_t amount) {
} }
template<BitIntegral T> template<BitIntegral T>
constexpr T rotate_left(T x, size_t amount) { constexpr T rotate_left(T x, size_t amount)
{
amount %= bitsizeof<T>; amount %= bitsizeof<T>;
if (amount == 0) { if (amount == 0) {
return x; return x;

View file

@ -8,18 +8,21 @@
namespace mcl::bit { namespace mcl::bit {
constexpr u16 swap_bytes_16(u16 value) { constexpr u16 swap_bytes_16(u16 value)
{
return static_cast<u16>(u32{value} >> 8 | u32{value} << 8); return static_cast<u16>(u32{value} >> 8 | u32{value} << 8);
} }
constexpr u32 swap_bytes_32(u32 value) { constexpr u32 swap_bytes_32(u32 value)
{
return ((value & 0xff000000u) >> 24) return ((value & 0xff000000u) >> 24)
| ((value & 0x00ff0000u) >> 8) | ((value & 0x00ff0000u) >> 8)
| ((value & 0x0000ff00u) << 8) | ((value & 0x0000ff00u) << 8)
| ((value & 0x000000ffu) << 24); | ((value & 0x000000ffu) << 24);
} }
constexpr u64 swap_bytes_64(u64 value) { constexpr u64 swap_bytes_64(u64 value)
{
return ((value & 0xff00000000000000ull) >> 56) return ((value & 0xff00000000000000ull) >> 56)
| ((value & 0x00ff000000000000ull) >> 40) | ((value & 0x00ff000000000000ull) >> 40)
| ((value & 0x0000ff0000000000ull) >> 24) | ((value & 0x0000ff0000000000ull) >> 24)
@ -30,19 +33,22 @@ constexpr u64 swap_bytes_64(u64 value) {
| ((value & 0x00000000000000ffull) << 56); | ((value & 0x00000000000000ffull) << 56);
} }
constexpr u32 swap_halves_32(u32 value) { constexpr u32 swap_halves_32(u32 value)
{
return ((value & 0xffff0000u) >> 16) return ((value & 0xffff0000u) >> 16)
| ((value & 0x0000ffffu) << 16); | ((value & 0x0000ffffu) << 16);
} }
constexpr u64 swap_halves_64(u64 value) { constexpr u64 swap_halves_64(u64 value)
{
return ((value & 0xffff000000000000ull) >> 48) return ((value & 0xffff000000000000ull) >> 48)
| ((value & 0x0000ffff00000000ull) >> 16) | ((value & 0x0000ffff00000000ull) >> 16)
| ((value & 0x00000000ffff0000ull) << 16) | ((value & 0x00000000ffff0000ull) << 16)
| ((value & 0x000000000000ffffull) << 48); | ((value & 0x000000000000ffffull) << 48);
} }
constexpr u64 swap_words_64(u64 value) { constexpr u64 swap_words_64(u64 value)
{
return ((value & 0xffffffff00000000ull) >> 32) return ((value & 0xffffffff00000000ull) >> 32)
| ((value & 0x00000000ffffffffull) << 32); | ((value & 0x00000000ffffffffull) << 32);
} }

View file

@ -11,7 +11,8 @@ namespace mcl {
/// Reinterpret objects of one type as another by bit-casting between object representations. /// Reinterpret objects of one type as another by bit-casting between object representations.
template<class Dest, class Source> template<class Dest, class Source>
inline Dest bit_cast(const Source& source) noexcept { inline Dest bit_cast(const Source& source) noexcept
{
static_assert(sizeof(Dest) == sizeof(Source), "size of destination and source objects must be equal"); static_assert(sizeof(Dest) == sizeof(Source), "size of destination and source objects must be equal");
static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable."); static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable.");
static_assert(std::is_trivially_copyable_v<Source>, "source type must be trivially copyable"); static_assert(std::is_trivially_copyable_v<Source>, "source type must be trivially copyable");
@ -24,7 +25,8 @@ inline Dest bit_cast(const Source& source) noexcept {
/// Reinterpret objects of any arbitrary type as another type by bit-casting between object representations. /// Reinterpret objects of any arbitrary type as another type by bit-casting between object representations.
/// Note that here we do not verify if source pointed to by source_ptr has enough bytes to read from. /// Note that here we do not verify if source pointed to by source_ptr has enough bytes to read from.
template<class Dest, class SourcePtr> template<class Dest, class SourcePtr>
inline Dest bit_cast_pointee(const SourcePtr source_ptr) noexcept { inline Dest bit_cast_pointee(const SourcePtr source_ptr) noexcept
{
static_assert(sizeof(SourcePtr) == sizeof(void*), "source pointer must have size of a pointer"); static_assert(sizeof(SourcePtr) == sizeof(void*), "source pointer must have size of a pointer");
static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable."); static_assert(std::is_trivially_copyable_v<Dest>, "destination type must be trivially copyable.");

View file

@ -0,0 +1,35 @@
// This file is part of the mcl project.
// Copyright (c) 2022 merryhime
// SPDX-License-Identifier: MIT
#pragma once
#include "mcl/bitsizeof.hpp"
#include "mcl/stdint.hpp"
namespace mcl::detail {
/// if MSB is 0, this is a full slot. remaining 7 bits is a partial hash of the key.
/// if MSB is 1, this is a non-full slot.
enum class meta_byte : u8 {
empty = 0xff,
tombstone = 0x80,
end_sentinel = 0x88,
};
inline bool is_full(meta_byte mb)
{
return (static_cast<u8>(mb) & 0x80) == 0;
}
inline meta_byte meta_byte_from_hash(size_t hash)
{
return static_cast<meta_byte>(hash >> (bitsizeof<size_t> - 7));
}
inline size_t group_index_from_hash(size_t hash, size_t group_index_mask)
{
return hash & group_index_mask;
}
} // namespace mcl::detail

View file

@ -0,0 +1,263 @@
// This file is part of the mcl project.
// Copyright (c) 2022 merryhime
// SPDX-License-Identifier: MIT
#pragma once
#include <array>
#include <bit>
#include "mcl/assert.hpp"
#include "mcl/container/detail/meta_byte.hpp"
#include "mcl/macro/architecture.hpp"
#include "mcl/stdint.hpp"
#if defined(MCL_ARCHITECTURE_ARM64)
# include <arm_neon.h>
#elif defined(MCL_ARCHITECTURE_X86_64)
# include <emmintrin.h>
# include "mcl/bit_cast.hpp"
#else
# include <cstring>
#endif
namespace mcl::detail {
#if defined(MCL_ARCHITECTURE_ARM64)
struct meta_byte_group {
static constexpr size_t max_group_size{16};
explicit meta_byte_group(meta_byte* ptr)
: data{vld1q_u8(reinterpret_cast<u8*>(ptr))}
{}
explicit meta_byte_group(const std::array<meta_byte, 16>& array)
: data{vld1q_u8(reinterpret_cast<const u8*>(array.data()))}
{}
uint64x2_t match(meta_byte cmp) const
{
return vreinterpretq_u64_u8(vandq_u8(vceqq_u8(data,
vdupq_n_u8(static_cast<u8>(cmp))),
vdupq_n_u8(0x80)));
}
uint64x2_t match_empty_or_tombstone() const
{
return vreinterpretq_u64_u8(vandq_u8(data,
vdupq_n_u8(0x80)));
}
bool is_any_empty() const
{
static_assert(meta_byte::empty == static_cast<meta_byte>(0xff), "empty must be maximal u8 value");
return vmaxvq_u8(data) == 0xff;
}
bool is_all_empty_or_tombstone() const
{
return vminvq_u8(vandq_u8(data, vdupq_n_u8(0x80))) == 0x80;
}
meta_byte get(size_t index) const
{
return static_cast<meta_byte>(data[index]);
}
void set(size_t index, meta_byte value)
{
data[index] = static_cast<u8>(value);
}
uint8x16_t data;
};
# define MCL_HMAP_MATCH_META_BYTE_GROUP(MATCH, ...) \
{ \
const uint64x2_t match_result{MATCH}; \
\
for (u64 match_result_v{match_result[0]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \
const size_t match_index{static_cast<size_t>(std::countr_zero(match_result_v) / 8)}; \
__VA_ARGS__ \
} \
\
for (u64 match_result_v{match_result[1]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \
const size_t match_index{static_cast<size_t>(8 + std::countr_zero(match_result_v) / 8)}; \
__VA_ARGS__ \
} \
}
# define MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(MATCH, ...) \
{ \
const uint64x2_t match_result{MATCH}; \
\
for (u64 match_result_v{match_result[0]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \
const size_t match_index{static_cast<size_t>(std::countr_zero(match_result_v) / 8)}; \
__VA_ARGS__ \
} \
\
for (u64 match_result_v{match_result[1] & 0x00ffffffffffffff}; match_result_v != 0; match_result_v &= match_result_v - 1) { \
const size_t match_index{static_cast<size_t>(8 + std::countr_zero(match_result_v) / 8)}; \
__VA_ARGS__ \
} \
}
#elif defined(MCL_ARCHITECTURE_X86_64)
struct meta_byte_group {
static constexpr size_t max_group_size{16};
explicit meta_byte_group(meta_byte* ptr)
: data{_mm_load_si128(reinterpret_cast<__m128i const*>(ptr))}
{}
explicit meta_byte_group(const std::array<meta_byte, 16>& array)
: data{_mm_loadu_si128(reinterpret_cast<__m128i const*>(array.data()))}
{}
u16 match(meta_byte cmp) const
{
return _mm_movemask_epi8(_mm_cmpeq_epi8(data, _mm_set1_epi8(static_cast<u8>(cmp))));
}
u16 match_empty_or_tombstone() const
{
return _mm_movemask_epi8(data);
}
bool is_any_empty() const
{
return match(meta_byte::empty);
}
bool is_all_empty_or_tombstone() const
{
return match_empty_or_tombstone() == 0xffff;
}
meta_byte get(size_t index) const
{
return mcl::bit_cast<std::array<meta_byte, max_group_size>>(data)[index];
}
void set(size_t index, meta_byte value)
{
auto array = mcl::bit_cast<std::array<meta_byte, max_group_size>>(data);
array[index] = value;
data = mcl::bit_cast<__m128i>(array);
}
__m128i data;
};
# define MCL_HMAP_MATCH_META_BYTE_GROUP(MATCH, ...) \
{ \
for (u16 match_result{MATCH}; match_result != 0; match_result &= match_result - 1) { \
const size_t match_index{static_cast<size_t>(std::countr_zero(match_result))}; \
__VA_ARGS__ \
} \
}
# define MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(MATCH, ...) \
{ \
for (u16 match_result{static_cast<u16>((MATCH) & (0x7fff))}; match_result != 0; match_result &= match_result - 1) { \
const size_t match_index{static_cast<size_t>(std::countr_zero(match_result))}; \
__VA_ARGS__ \
} \
}
#else
struct meta_byte_group {
static constexpr size_t max_group_size{16};
static constexpr u64 msb{0x8080808080808080};
static constexpr u64 lsb{0x0101010101010101};
static constexpr u64 not_msb{0x7f7f7f7f7f7f7f7f};
static constexpr u64 not_lsb{0xfefefefefefefefe};
explicit meta_byte_group(meta_byte* ptr)
{
std::memcpy(data.data(), ptr, sizeof(data));
}
explicit meta_byte_group(const std::array<meta_byte, 16>& array)
: data{array}
{}
std::array<u64, 2> match(meta_byte cmp) const
{
DEBUG_ASSERT(is_full(cmp));
const u64 vcmp{lsb * static_cast<u64>(cmp)};
return {(msb - ((data[0] ^ vcmp) & not_msb)) & ~data[0] & msb, (msb - ((data[1] ^ vcmp) & not_msb)) & ~data[1] & msb};
}
std::array<u64, 2> match_empty_or_tombstone() const
{
return {data[0] & msb, data[1] & msb};
}
bool is_any_empty() const
{
static_assert((static_cast<u8>(meta_byte::empty) & 0xc0) == 0xc0);
static_assert((static_cast<u8>(meta_byte::tombstone) & 0xc0) == 0x80);
return (data[0] & (data[0] << 1) & msb) || (data[1] & (data[1] << 1) & msb);
}
bool is_all_empty_or_tombstone() const
{
return (data[0] & data[1] & msb) == msb;
}
meta_byte get(size_t index) const
{
return mcl::bit_cast<std::array<meta_byte, max_group_size>>(data)[index];
}
void set(size_t index, meta_byte value)
{
auto array = mcl::bit_cast<std::array<meta_byte, max_group_size>>(data);
array[index] = value;
data = mcl::bit_cast<std::array<u64, 2>>(array);
}
std::array<u64, 2> data;
};
# define MCL_HMAP_MATCH_META_BYTE_GROUP(MATCH, ...) \
{ \
const std::array<u64, 2> match_result{MATCH}; \
\
for (u64 match_result_v{match_result[0]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \
const size_t match_index{static_cast<size_t>(std::countr_zero(match_result_v) / 8)}; \
__VA_ARGS__ \
} \
\
for (u64 match_result_v{match_result[1]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \
const size_t match_index{static_cast<size_t>(8 + std::countr_zero(match_result_v) / 8)}; \
__VA_ARGS__ \
} \
}
# define MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(MATCH, ...) \
{ \
const std::array<u64, 2> match_result{MATCH}; \
\
for (u64 match_result_v{match_result[0]}; match_result_v != 0; match_result_v &= match_result_v - 1) { \
const size_t match_index{static_cast<size_t>(std::countr_zero(match_result_v) / 8)}; \
__VA_ARGS__ \
} \
\
for (u64 match_result_v{match_result[1] & 0x00ffffffffffffff}; match_result_v != 0; match_result_v &= match_result_v - 1) { \
const size_t match_index{static_cast<size_t>(8 + std::countr_zero(match_result_v) / 8)}; \
__VA_ARGS__ \
} \
}
#endif
} // namespace mcl::detail

View file

@ -0,0 +1,16 @@
// This file is part of the mcl project.
// Copyright (c) 2022 merryhime
// SPDX-License-Identifier: MIT
#pragma once
namespace mcl::detail {
template<typename ValueType>
union slot_union {
slot_union() {}
~slot_union() {}
ValueType value;
};
} // namespace mcl::detail

View file

@ -0,0 +1,532 @@
// This file is part of the mcl project.
// Copyright (c) 2022 merryhime
// SPDX-License-Identifier: MIT
#pragma once
#include <cstddef>
#include <functional>
#include <limits>
#include <type_traits>
#include <utility>
#include "mcl/assert.hpp"
#include "mcl/container/detail/meta_byte.hpp"
#include "mcl/container/detail/meta_byte_group.hpp"
#include "mcl/container/detail/slot_union.hpp"
#include "mcl/hash/xmrx.hpp"
#include "mcl/hint/assume.hpp"
#include "mcl/memory/overaligned_unique_ptr.hpp"
namespace mcl {
template<typename KeyType, typename MappedType, typename Hash, typename Pred>
class hmap;
template<bool IsConst, typename KeyType, typename MappedType, typename Hash, typename Pred>
class hmap_iterator {
using base_value_type = std::pair<const KeyType, MappedType>;
using slot_type = detail::slot_union<base_value_type>;
public:
using key_type = KeyType;
using mapped_type = MappedType;
using iterator_category = std::forward_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = std::conditional_t<IsConst, std::add_const_t<base_value_type>, base_value_type>;
using pointer = value_type*;
using const_pointer = const value_type*;
using reference = value_type&;
using const_reference = const value_type&;
hmap_iterator() = default;
hmap_iterator(const hmap_iterator& other) = default;
hmap_iterator& operator=(const hmap_iterator& other) = default;
hmap_iterator& operator++()
{
if (mb_ptr == nullptr)
return *this;
++mb_ptr;
++slot_ptr;
skip_empty_or_tombstone();
return *this;
}
hmap_iterator operator++(int)
{
hmap_iterator it(*this);
++*this;
return it;
}
bool operator==(const hmap_iterator& other) const
{
return std::tie(mb_ptr, slot_ptr) == std::tie(other.mb_ptr, other.slot_ptr);
}
bool operator!=(const hmap_iterator& other) const
{
return !operator==(other);
}
reference operator*() const
{
return static_cast<reference>(slot_ptr->value);
}
pointer operator->() const
{
return std::addressof(operator*());
}
private:
friend class hmap<KeyType, MappedType, Hash, Pred>;
hmap_iterator(detail::meta_byte* mb_ptr, slot_type* slot_ptr)
: mb_ptr{mb_ptr}, slot_ptr{slot_ptr}
{
ASSUME(mb_ptr != nullptr);
ASSUME(slot_ptr != nullptr);
}
void skip_empty_or_tombstone()
{
if (!mb_ptr)
return;
while (*mb_ptr == detail::meta_byte::empty || *mb_ptr == detail::meta_byte::tombstone) {
++mb_ptr;
++slot_ptr;
}
if (*mb_ptr == detail::meta_byte::end_sentinel) {
mb_ptr = nullptr;
slot_ptr = nullptr;
}
}
detail::meta_byte* mb_ptr{nullptr};
slot_type* slot_ptr{nullptr};
};
template<typename KeyType, typename MappedType, typename Hash = hash::avalanche_xmrx<KeyType>, typename Pred = std::equal_to<KeyType>>
class hmap {
public:
using key_type = KeyType;
using mapped_type = MappedType;
using hasher = Hash;
using key_equal = Pred;
using value_type = std::pair<const key_type, mapped_type>;
using reference = value_type&;
using const_reference = const value_type&;
using pointer = value_type*;
using const_pointer = const value_type*;
using size_type = std::size_t;
using difference_type = std::ptrdiff_t;
using iterator = hmap_iterator<false, key_type, mapped_type, hasher, key_equal>;
using const_iterator = hmap_iterator<true, key_type, mapped_type, hasher, key_equal>;
private:
static constexpr size_t group_size{detail::meta_byte_group::max_group_size};
static constexpr size_t average_max_group_load{group_size - 2};
using slot_type = detail::slot_union<value_type>;
using slot_ptr = std::unique_ptr<slot_type[]>;
using meta_byte_ptr = overaligned_unique_ptr<group_size, detail::meta_byte[]>;
static_assert(!std::is_reference_v<key_type>);
static_assert(!std::is_reference_v<mapped_type>);
public:
hmap()
{
initialize_members(1);
}
hmap(const hmap& other)
{
deep_copy(other);
}
hmap(hmap&& other)
: group_index_mask{std::exchange(other.group_index_mask, 0)}
, empty_slots{std::exchange(other.empty_slots, 0)}
, full_slots{std::exchange(other.full_slots, 0)}
, mbs{std::move(other.mbs)}
, slots{std::move(other.slots)}
{
}
hmap& operator=(const hmap& other)
{
deep_copy(other);
return *this;
}
hmap& operator=(hmap&& other)
{
group_index_mask = std::exchange(other.group_index_mask, 0);
empty_slots = std::exchange(other.empty_slots, 0);
full_slots = std::exchange(other.full_slots, 0);
mbs = std::move(other.mbs);
slots = std::move(other.slots);
return *this;
}
~hmap()
{
if (!mbs)
return;
clear();
}
[[nodiscard]] bool empty() const noexcept { return full_slots == 0; }
size_type size() const noexcept { return full_slots; }
size_type max_size() const noexcept { return static_cast<size_type>(std::numeric_limits<difference_type>::max()); }
iterator begin()
{
iterator result{iterator_at(0)};
result.skip_empty_or_tombstone();
return result;
}
iterator end()
{
return {};
}
const_iterator cbegin() const
{
const_iterator result{const_iterator_at(0)};
result.skip_empty_or_tombstone();
return result;
}
const_iterator cend() const
{
return {};
}
const_iterator begin() const
{
return cbegin();
}
const_iterator end() const
{
return cend();
}
template<typename K = key_type, typename... Args>
std::pair<iterator, bool> try_emplace(K&& k, Args&&... args)
{
auto [item_index, item_found] = find_key_or_empty_slot(k);
if (!item_found) {
new (&slots[item_index].value) value_type(
std::piecewise_construct,
std::forward_as_tuple(std::forward<K>(k)),
std::forward_as_tuple(std::forward<Args>(args)...));
}
return {iterator_at(item_index), !item_found};
}
template<typename K = key_type, typename V = mapped_type>
std::pair<iterator, bool> insert_or_assign(K&& k, V&& v)
{
auto [item_index, item_found] = find_key_or_empty_slot(k);
if (item_found) {
slots[item_index].value.second = std::forward<V>(v);
} else {
new (&slots[item_index].value) value_type(
std::forward<K>(k),
std::forward<V>(v));
}
return {iterator_at(item_index), !item_found};
}
void erase(const_iterator position)
{
if (position == cend()) {
return;
}
const std::size_t item_index{static_cast<std::size_t>(std::distance(mbs.get(), position.mb_ptr))};
const std::size_t group_index{item_index / group_size};
const detail::meta_byte_group g{mbs.get() + group_index * group_size};
erase_impl(item_index, std::move(g));
}
void erase(iterator position)
{
if (position == end()) {
return;
}
const std::size_t item_index{static_cast<std::size_t>(std::distance(mbs.get(), position.mb_ptr))};
const std::size_t group_index{item_index / group_size};
const detail::meta_byte_group g{mbs.get() + group_index * group_size};
erase_impl(item_index, std::move(g));
}
template<typename K = key_type>
size_t erase(const K& key)
{
const std::size_t hash{hasher{}(key)};
const detail::meta_byte mb{detail::meta_byte_from_hash(hash)};
size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
while (true) {
detail::meta_byte_group g{mbs.get() + group_index * group_size};
MCL_HMAP_MATCH_META_BYTE_GROUP(g.match(mb), {
const std::size_t item_index{group_index * group_size + match_index};
if (key_equal{}(slots[item_index].value.first, key)) [[likely]] {
erase_impl(item_index, std::move(g));
return 1;
}
});
if (g.is_any_empty()) [[likely]] {
return 0;
}
group_index = (group_index + 1) & group_index_mask;
}
}
template<typename K = key_type>
iterator find(const K& key)
{
const std::size_t hash{hasher{}(key)};
const detail::meta_byte mb{detail::meta_byte_from_hash(hash)};
size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
while (true) {
detail::meta_byte_group g{mbs.get() + group_index * group_size};
MCL_HMAP_MATCH_META_BYTE_GROUP(g.match(mb), {
const std::size_t item_index{group_index * group_size + match_index};
if (key_equal{}(slots[item_index].value.first, key)) [[likely]] {
return iterator_at(item_index);
}
});
if (g.is_any_empty()) [[likely]] {
return {};
}
group_index = (group_index + 1) & group_index_mask;
}
}
template<typename K = key_type>
const_iterator find(const K& key) const
{
const std::size_t hash{hasher{}(key)};
const detail::meta_byte mb{detail::meta_byte_from_hash(hash)};
size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
while (true) {
detail::meta_byte_group g{mbs.get() + group_index * group_size};
MCL_HMAP_MATCH_META_BYTE_GROUP(g.match(mb), {
const std::size_t item_index{group_index * group_size + match_index};
if (key_equal{}(slots[item_index].value.first, key)) [[likely]] {
return const_iterator_at(item_index);
}
});
if (g.is_any_empty()) [[likely]] {
return {};
}
group_index = (group_index + 1) & group_index_mask;
}
}
template<typename K = key_type>
bool contains(const K& key) const
{
return find(key) != end();
}
template<typename K = key_type>
size_t count(const K& key) const
{
return contains(key) ? 1 : 0;
}
template<typename K = key_type>
mapped_type& operator[](K&& k)
{
return try_emplace(std::forward<K>(k)).first->second;
}
template<typename K = key_type>
mapped_type& at(K&& k)
{
const auto iter{find(k)};
if (iter == end()) {
throw std::out_of_range("hmap::at: key not found");
}
return iter->second;
}
template<typename K = key_type>
const mapped_type& at(K&& k) const
{
const auto iter{find(k)};
if (iter == end()) {
throw std::out_of_range("hmap::at: key not found");
}
return iter->second;
}
void clear()
{
for (auto iter{begin()}; iter != end(); ++iter) {
iter->~value_type();
}
clear_metadata();
}
private:
iterator iterator_at(std::size_t item_index)
{
return {mbs.get() + item_index, slots.get() + item_index};
}
const_iterator const_iterator_at(std::size_t item_index) const
{
return {mbs.get() + item_index, slots.get() + item_index};
}
std::pair<std::size_t, bool> find_key_or_empty_slot(const key_type& key)
{
const std::size_t hash{hasher{}(key)};
const detail::meta_byte mb{detail::meta_byte_from_hash(hash)};
std::size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
while (true) {
detail::meta_byte_group g{mbs.get() + group_index * group_size};
MCL_HMAP_MATCH_META_BYTE_GROUP(g.match(mb), {
const std::size_t item_index{group_index * group_size + match_index};
if (key_equal{}(slots[item_index].value.first, key)) [[likely]] {
return {item_index, true};
}
});
if (g.is_any_empty()) [[likely]] {
return {find_empty_slot_to_insert(hash), false};
}
group_index = (group_index + 1) & group_index_mask;
}
}
std::size_t find_empty_slot_to_insert(const std::size_t hash)
{
if (empty_slots == 0) [[unlikely]] {
grow_and_rehash();
}
std::size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
while (true) {
detail::meta_byte_group g{mbs.get() + group_index * group_size};
MCL_HMAP_MATCH_META_BYTE_GROUP(g.match_empty_or_tombstone(), {
const std::size_t item_index{group_index * group_size + match_index};
if (mbs[item_index] == detail::meta_byte::empty) [[likely]] {
--empty_slots;
}
++full_slots;
mbs[item_index] = detail::meta_byte_from_hash(hash);
return item_index;
});
group_index = (group_index + 1) & group_index_mask;
}
}
void erase_impl(std::size_t item_index, detail::meta_byte_group&& g)
{
slots[item_index].value->~value_type();
--full_slots;
if (g.is_any_empty()) {
mbs[item_index] = detail::meta_byte::empty;
++empty_slots;
} else {
mbs[item_index] = detail::meta_byte::tombstone;
}
}
void grow_and_rehash()
{
const std::size_t new_group_count{2 * (group_index_mask + 1)};
pow2_resize(new_group_count);
}
void pow2_resize(std::size_t new_group_count)
{
auto iter{begin()};
const auto old_mbs{std::move(mbs)};
const auto old_slots{std::move(slots)};
initialize_members(new_group_count);
for (; iter != end(); ++iter) {
const std::size_t hash{hasher{}(iter->first)};
const std::size_t item_index{find_empty_slot_to_insert(hash)};
new (&slots[item_index].value) value_type(std::move(iter.slot_ptr->value));
iter.slot_ptr->value.~value_type();
}
}
void deep_copy(const hmap& other)
{
initialize_members(other.group_index_mask + 1);
for (auto iter = other.begin(); iter != other.end(); ++iter) {
const std::size_t hash{hasher{}(iter->first)};
const std::size_t item_index{find_empty_slot_to_insert(hash)};
new (&slots[item_index].value) value_type(iter.slot_ptr->value);
}
}
void initialize_members(std::size_t group_count)
{
// DEBUG_ASSERT(group_count != 0 && std::ispow2(group_count));
group_index_mask = group_count - 1;
mbs = make_overaligned_unique_ptr_array<group_size, detail::meta_byte>(group_count * group_size + 1);
slots = slot_ptr{new slot_type[group_count * group_size]};
clear_metadata();
}
void clear_metadata()
{
const std::size_t group_count{group_index_mask + 1};
empty_slots = group_count * average_max_group_load;
full_slots = 0;
std::memset(mbs.get(), static_cast<int>(detail::meta_byte::empty), group_count * group_size);
mbs[group_count * group_size] = detail::meta_byte::end_sentinel;
}
std::size_t group_index_mask;
std::size_t empty_slots;
std::size_t full_slots;
meta_byte_ptr mbs;
slot_ptr slots;
};
} // namespace mcl

View file

@ -0,0 +1,549 @@
// This file is part of the mcl project.
// Copyright (c) 2022 merryhime
// SPDX-License-Identifier: MIT
#pragma once
#include <array>
#include <cstddef>
#include <functional>
#include <limits>
#include <type_traits>
#include <utility>
#include "mcl/assert.hpp"
#include "mcl/container/detail/meta_byte.hpp"
#include "mcl/container/detail/meta_byte_group.hpp"
#include "mcl/container/detail/slot_union.hpp"
#include "mcl/hash/xmrx.hpp"
#include "mcl/hint/assume.hpp"
namespace mcl {
template<typename KeyType, typename MappedType, typename Hash, typename Pred>
class ihmap;
namespace detail {
constexpr std::array<meta_byte, 16> ihmap_default_meta{
meta_byte::empty, meta_byte::empty, meta_byte::empty, meta_byte::empty,
meta_byte::empty, meta_byte::empty, meta_byte::empty, meta_byte::empty,
meta_byte::empty, meta_byte::empty, meta_byte::empty, meta_byte::empty,
meta_byte::empty, meta_byte::empty, meta_byte::empty, meta_byte::tombstone};
template<typename KeyType, typename MappedType>
struct ihmap_group {
using base_value_type = std::pair<const KeyType, MappedType>;
using slot_type = detail::slot_union<base_value_type>;
static constexpr std::size_t group_size{meta_byte_group::max_group_size - 1};
meta_byte_group meta{ihmap_default_meta};
std::array<slot_type, group_size> slots{};
};
} // namespace detail
template<bool IsConst, typename KeyType, typename MappedType, typename Hash, typename Pred>
class ihmap_iterator {
using group_type = detail::ihmap_group<KeyType, MappedType>;
using base_value_type = typename group_type::base_value_type;
public:
using key_type = KeyType;
using mapped_type = MappedType;
using iterator_category = std::forward_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = std::conditional_t<IsConst, std::add_const_t<base_value_type>, base_value_type>;
using pointer = value_type*;
using const_pointer = const value_type*;
using reference = value_type&;
using const_reference = const value_type&;
ihmap_iterator() = default;
ihmap_iterator(const ihmap_iterator& other) = default;
ihmap_iterator& operator=(const ihmap_iterator& other) = default;
ihmap_iterator& operator++()
{
if (group_ptr == nullptr)
return *this;
++slot_index;
skip_empty_or_tombstone();
return *this;
}
ihmap_iterator operator++(int)
{
ihmap_iterator it(*this);
++*this;
return it;
}
bool operator==(const ihmap_iterator& other) const
{
return std::tie(group_ptr, slot_index) == std::tie(other.group_ptr, other.slot_index);
}
bool operator!=(const ihmap_iterator& other) const
{
return !operator==(other);
}
reference operator*() const
{
return static_cast<reference>(group_ptr->slots[slot_index].value);
}
pointer operator->() const
{
return std::addressof(operator*());
}
private:
friend class ihmap<KeyType, MappedType, Hash, Pred>;
ihmap_iterator(group_type* group_ptr, size_t slot_index)
: group_ptr{group_ptr}, slot_index{slot_index}
{
ASSUME(group_ptr != nullptr);
}
void skip_empty_or_tombstone()
{
if (!group_ptr)
return;
while (true) {
const detail::meta_byte mb = group_ptr->meta.get(slot_index);
if (slot_index == group_type::group_size) {
slot_index = 0;
++group_ptr;
if (mb == detail::meta_byte::end_sentinel) {
group_ptr = nullptr;
return;
}
continue;
}
if (is_full(mb)) {
break;
}
++slot_index;
}
}
group_type* group_ptr{nullptr};
std::size_t slot_index{0};
};
template<typename KeyType, typename MappedType, typename Hash = hash::avalanche_xmrx<KeyType>, typename Pred = std::equal_to<KeyType>>
class ihmap {
using group_type = detail::ihmap_group<KeyType, MappedType>;
public:
using key_type = KeyType;
using mapped_type = MappedType;
using hasher = Hash;
using key_equal = Pred;
using value_type = typename group_type::base_value_type;
using reference = value_type&;
using const_reference = const value_type&;
using pointer = value_type*;
using const_pointer = const value_type*;
using size_type = std::size_t;
using difference_type = std::ptrdiff_t;
using iterator = ihmap_iterator<false, key_type, mapped_type, hasher, key_equal>;
using const_iterator = ihmap_iterator<true, key_type, mapped_type, hasher, key_equal>;
private:
static_assert(!std::is_reference_v<key_type>);
static_assert(!std::is_reference_v<mapped_type>);
static constexpr std::size_t group_size{group_type::group_size};
static constexpr std::size_t average_max_group_load{group_size - 2};
struct position {
std::size_t group_index;
std::size_t slot_index;
};
public:
ihmap()
{
initialize_members(1);
}
ihmap(const ihmap& other)
{
deep_copy(other);
}
ihmap(ihmap&& other)
: group_index_mask{std::exchange(other.group_index_mask, 0)}
, empty_slots{std::exchange(other.empty_slots, 0)}
, full_slots{std::exchange(other.full_slots, 0)}
, groups{std::move(other.groups)}
{
}
ihmap& operator=(const ihmap& other)
{
deep_copy(other);
return *this;
}
ihmap& operator=(ihmap&& other)
{
group_index_mask = std::exchange(other.group_index_mask, 0);
empty_slots = std::exchange(other.empty_slots, 0);
full_slots = std::exchange(other.full_slots, 0);
groups = std::move(other.groups);
return *this;
}
~ihmap()
{
if (!groups)
return;
clear();
}
[[nodiscard]] bool empty() const noexcept { return full_slots == 0; }
size_type size() const noexcept { return full_slots; }
size_type max_size() const noexcept { return static_cast<size_type>(std::numeric_limits<difference_type>::max()); }
iterator begin()
{
iterator result{iterator_at({0, 0})};
result.skip_empty_or_tombstone();
return result;
}
iterator end()
{
return {};
}
const_iterator cbegin() const
{
const_iterator result{const_iterator_at({0, 0})};
result.skip_empty_or_tombstone();
return result;
}
const_iterator cend() const
{
return {};
}
const_iterator begin() const
{
return cbegin();
}
const_iterator end() const
{
return cend();
}
template<typename K = key_type, typename... Args>
std::pair<iterator, bool> try_emplace(K&& k, Args&&... args)
{
auto [pos, item_found] = find_key_or_empty_slot(k);
if (!item_found) {
new (&groups[pos.group_index].slots[pos.slot_index].value) value_type(
std::piecewise_construct,
std::forward_as_tuple(std::forward<K>(k)),
std::forward_as_tuple(std::forward<Args>(args)...));
}
return {iterator_at(pos), !item_found};
}
template<typename K = key_type, typename V = mapped_type>
std::pair<iterator, bool> insert_or_assign(K&& k, V&& v)
{
auto [pos, item_found] = find_key_or_empty_slot(k);
if (item_found) {
groups[pos.group_index].slots[pos.slot_index].value.second = std::forward<V>(v);
} else {
new (&groups[pos.group_index].slots[pos.slot_index].value) value_type(
std::forward<K>(k),
std::forward<V>(v));
}
return {iterator_at(pos), !item_found};
}
void erase(const_iterator iter)
{
if (iter == cend()) {
return;
}
const std::size_t group_index{static_cast<std::size_t>(std::distance(groups.get(), iter.group_ptr))};
erase_impl({group_index, iter.slot_index});
}
void erase(iterator iter)
{
if (iter == end()) {
return;
}
const std::size_t group_index{static_cast<std::size_t>(std::distance(groups.get(), iter.group_ptr))};
erase_impl({group_index, iter.slot_index});
}
template<typename K = key_type>
std::size_t erase(const K& key)
{
const std::size_t hash{hasher{}(key)};
const detail::meta_byte mb{detail::meta_byte_from_hash(hash)};
std::size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
while (true) {
const group_type& g{groups[group_index]};
MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(g.meta.match(mb), {
if (key_equal{}(g.slots[match_index].value.first, key)) [[likely]] {
erase_impl({group_index, match_index});
return 1;
}
});
if (g.meta.is_any_empty()) [[likely]] {
return 0;
}
group_index = (group_index + 1) & group_index_mask;
}
}
template<typename K = key_type>
iterator find(const K& key)
{
const std::size_t hash{hasher{}(key)};
const detail::meta_byte mb{detail::meta_byte_from_hash(hash)};
std::size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
while (true) {
const group_type& g{groups[group_index]};
MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(g.meta.match(mb), {
if (key_equal{}(g.slots[match_index].value.first, key)) [[likely]] {
return iterator_at({group_index, match_index});
}
});
if (g.meta.is_any_empty()) [[likely]] {
return {};
}
group_index = (group_index + 1) & group_index_mask;
}
}
template<typename K = key_type>
const_iterator find(const K& key) const
{
const std::size_t hash{hasher{}(key)};
const detail::meta_byte mb{detail::meta_byte_from_hash(hash)};
std::size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
while (true) {
const group_type& g{groups[group_index]};
MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(g.meta.match(mb), {
if (key_equal{}(g.slots[match_index].value.first, key)) [[likely]] {
return const_iterator_at({group_index, match_index});
}
});
if (g.meta.is_any_empty()) [[likely]] {
return {};
}
group_index = (group_index + 1) & group_index_mask;
}
}
template<typename K = key_type>
bool contains(const K& key) const
{
return find(key) != end();
}
template<typename K = key_type>
std::size_t count(const K& key) const
{
return contains(key) ? 1 : 0;
}
template<typename K = key_type>
mapped_type& operator[](K&& k)
{
return try_emplace(std::forward<K>(k)).first->second;
}
template<typename K = key_type>
mapped_type& at(K&& k)
{
const auto iter{find(k)};
if (iter == end()) {
throw std::out_of_range("ihmap::at: key not found");
}
return iter->second;
}
template<typename K = key_type>
const mapped_type& at(K&& k) const
{
const auto iter{find(k)};
if (iter == end()) {
throw std::out_of_range("ihmap::at: key not found");
}
return iter->second;
}
void clear()
{
for (auto iter{begin()}; iter != end(); ++iter) {
iter->~value_type();
}
clear_metadata();
}
private:
iterator iterator_at(position pos)
{
return {groups.get() + pos.group_index, pos.slot_index};
}
const_iterator const_iterator_at(position pos) const
{
return {groups.get() + pos.group_index, pos.slot_index};
}
std::pair<position, bool> find_key_or_empty_slot(const key_type& key)
{
const std::size_t hash{hasher{}(key)};
const detail::meta_byte mb{detail::meta_byte_from_hash(hash)};
std::size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
while (true) {
const group_type& g{groups[group_index]};
MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(g.meta.match(mb), {
if (key_equal{}(g.slots[match_index].value.first, key)) [[likely]] {
return {{group_index, match_index}, true};
}
});
if (g.meta.is_any_empty()) [[likely]] {
return {find_empty_slot_to_insert(hash), false};
}
group_index = (group_index + 1) & group_index_mask;
}
}
position find_empty_slot_to_insert(const std::size_t hash)
{
if (empty_slots == 0) [[unlikely]] {
grow_and_rehash();
}
std::size_t group_index{detail::group_index_from_hash(hash, group_index_mask)};
while (true) {
group_type& g{groups[group_index]};
MCL_HMAP_MATCH_META_BYTE_GROUP_EXCEPT_LAST(g.meta.match_empty_or_tombstone(), {
if (g.meta.get(match_index) == detail::meta_byte::empty) [[likely]] {
--empty_slots;
}
++full_slots;
g.meta.set(match_index, detail::meta_byte_from_hash(hash));
return {group_index, match_index};
});
group_index = (group_index + 1) & group_index_mask;
}
}
void erase_impl(position pos)
{
group_type& g{groups[pos.group_index]};
g.slots[pos.slot_index].value.~value_type();
--full_slots;
if (g.meta.is_any_empty()) {
g.meta.set(pos.slot_index, detail::meta_byte::empty);
++empty_slots;
} else {
g.meta.set(pos.slot_index, detail::meta_byte::tombstone);
}
}
void grow_and_rehash()
{
const std::size_t new_group_count{2 * (group_index_mask + 1)};
pow2_resize(new_group_count);
}
void pow2_resize(std::size_t new_group_count)
{
auto iter{begin()};
const auto old_groups{std::move(groups)};
initialize_members(new_group_count);
for (; iter != end(); ++iter) {
const std::size_t hash{hasher{}(iter->first)};
const position pos{find_empty_slot_to_insert(hash)};
new (&groups[pos.group_index].slots[pos.slot_index].value) value_type(std::move(iter.group_ptr->slots[iter.slot_index].value));
iter.group_ptr->slots[iter.slot_index].value.~value_type();
}
}
void deep_copy(const ihmap& other)
{
initialize_members(other.group_index_mask + 1);
for (auto iter = other.begin(); iter != other.end(); ++iter) {
const std::size_t hash{hasher{}(iter->first)};
const position pos{find_empty_slot_to_insert(hash)};
new (&groups[pos.group_index].slots[pos.slot_index].value) value_type(iter.group_ptr->slots[iter.slot_index].value);
}
}
void initialize_members(std::size_t group_count)
{
// DEBUG_ASSERT(group_count != 0 && std::ispow2(group_count));
group_index_mask = group_count - 1;
groups = std::unique_ptr<group_type[]>{new group_type[group_count]};
clear_metadata();
}
void clear_metadata()
{
const std::size_t group_count{group_index_mask + 1};
empty_slots = group_count * average_max_group_load;
full_slots = 0;
for (size_t i{0}; i < group_count; ++i) {
groups[i].meta = detail::meta_byte_group{detail::ihmap_default_meta};
}
groups[group_count - 1].meta.set(group_size, detail::meta_byte::end_sentinel);
}
std::size_t group_index_mask;
std::size_t empty_slots;
std::size_t full_slots;
std::unique_ptr<group_type[]> groups;
};
} // namespace mcl

View file

@ -21,7 +21,8 @@ class intrusive_list_iterator;
template<typename T> template<typename T>
class intrusive_list_node { class intrusive_list_node {
public: public:
bool is_sentinel() const { bool is_sentinel() const
{
return is_sentinel_; return is_sentinel_;
} }
@ -42,7 +43,8 @@ class intrusive_list_sentinel final : public intrusive_list_node<T> {
using intrusive_list_node<T>::is_sentinel_; using intrusive_list_node<T>::is_sentinel_;
public: public:
intrusive_list_sentinel() { intrusive_list_sentinel()
{
next = this; next = this;
prev = this; prev = this;
is_sentinel_ = true; is_sentinel_ = true;
@ -72,50 +74,56 @@ public:
intrusive_list_iterator& operator=(const intrusive_list_iterator& other) = default; intrusive_list_iterator& operator=(const intrusive_list_iterator& other) = default;
explicit intrusive_list_iterator(node_pointer list_node) explicit intrusive_list_iterator(node_pointer list_node)
: node(list_node) { : node(list_node) {}
}
explicit intrusive_list_iterator(pointer data) explicit intrusive_list_iterator(pointer data)
: node(data) { : node(data) {}
}
explicit intrusive_list_iterator(reference data) explicit intrusive_list_iterator(reference data)
: node(&data) { : node(&data) {}
}
intrusive_list_iterator& operator++() { intrusive_list_iterator& operator++()
{
node = node->next; node = node->next;
return *this; return *this;
} }
intrusive_list_iterator& operator--() { intrusive_list_iterator& operator--()
{
node = node->prev; node = node->prev;
return *this; return *this;
} }
intrusive_list_iterator operator++(int) { intrusive_list_iterator operator++(int)
{
intrusive_list_iterator it(*this); intrusive_list_iterator it(*this);
++*this; ++*this;
return it; return it;
} }
intrusive_list_iterator operator--(int) { intrusive_list_iterator operator--(int)
{
intrusive_list_iterator it(*this); intrusive_list_iterator it(*this);
--*this; --*this;
return it; return it;
} }
bool operator==(const intrusive_list_iterator& other) const { bool operator==(const intrusive_list_iterator& other) const
{
return node == other.node; return node == other.node;
} }
bool operator!=(const intrusive_list_iterator& other) const { bool operator!=(const intrusive_list_iterator& other) const
{
return !operator==(other); return !operator==(other);
} }
reference operator*() const { reference operator*() const
{
DEBUG_ASSERT(!node->is_sentinel()); DEBUG_ASSERT(!node->is_sentinel());
return static_cast<reference>(*node); return static_cast<reference>(*node);
} }
pointer operator->() const { pointer operator->() const
{
return std::addressof(operator*()); return std::addressof(operator*());
} }
node_pointer AsNodePointer() const { node_pointer AsNodePointer() const
{
return node; return node;
} }
@ -145,7 +153,8 @@ public:
* @param location The location to insert the node. * @param location The location to insert the node.
* @param new_node The node to add. * @param new_node The node to add.
*/ */
iterator insert(iterator location, pointer new_node) { iterator insert(iterator location, pointer new_node)
{
return insert_before(location, new_node); return insert_before(location, new_node);
} }
@ -156,7 +165,8 @@ public:
* @param location The location to insert the new node. * @param location The location to insert the new node.
* @param new_node The node to insert into the list. * @param new_node The node to insert into the list.
*/ */
iterator insert_before(iterator location, pointer new_node) { iterator insert_before(iterator location, pointer new_node)
{
auto existing_node = location.AsNodePointer(); auto existing_node = location.AsNodePointer();
new_node->next = existing_node; new_node->next = existing_node;
@ -173,7 +183,8 @@ public:
* @param position Location to insert the node in front of. * @param position Location to insert the node in front of.
* @param new_node The node to be inserted into the list. * @param new_node The node to be inserted into the list.
*/ */
iterator insert_after(iterator position, pointer new_node) { iterator insert_after(iterator position, pointer new_node)
{
if (empty()) if (empty())
return insert(begin(), new_node); return insert(begin(), new_node);
@ -184,7 +195,8 @@ public:
* Add an entry to the start of the list. * Add an entry to the start of the list.
* @param node Node to add to the list. * @param node Node to add to the list.
*/ */
void push_front(pointer node) { void push_front(pointer node)
{
insert(begin(), node); insert(begin(), node);
} }
@ -192,7 +204,8 @@ public:
* Add an entry to the end of the list * Add an entry to the end of the list
* @param node Node to add to the list. * @param node Node to add to the list.
*/ */
void push_back(pointer node) { void push_back(pointer node)
{
insert(end(), node); insert(end(), node);
} }
@ -200,7 +213,8 @@ public:
* Erases the node at the front of the list. * Erases the node at the front of the list.
* @note Must not be called on an empty list. * @note Must not be called on an empty list.
*/ */
void pop_front() { void pop_front()
{
DEBUG_ASSERT(!empty()); DEBUG_ASSERT(!empty());
erase(begin()); erase(begin());
} }
@ -209,7 +223,8 @@ public:
* Erases the node at the back of the list. * Erases the node at the back of the list.
* @note Must not be called on an empty list. * @note Must not be called on an empty list.
*/ */
void pop_back() { void pop_back()
{
DEBUG_ASSERT(!empty()); DEBUG_ASSERT(!empty());
erase(--end()); erase(--end());
} }
@ -218,7 +233,8 @@ public:
* Removes a node from this list * Removes a node from this list
* @param it An iterator that points to the node to remove from list. * @param it An iterator that points to the node to remove from list.
*/ */
pointer remove(iterator& it) { pointer remove(iterator& it)
{
DEBUG_ASSERT(it != end()); DEBUG_ASSERT(it != end());
pointer node = &*it++; pointer node = &*it++;
@ -237,7 +253,8 @@ public:
* Removes a node from this list * Removes a node from this list
* @param it A constant iterator that points to the node to remove from list. * @param it A constant iterator that points to the node to remove from list.
*/ */
pointer remove(const iterator& it) { pointer remove(const iterator& it)
{
iterator copy = it; iterator copy = it;
return remove(copy); return remove(copy);
} }
@ -246,7 +263,8 @@ public:
* Removes a node from this list. * Removes a node from this list.
* @param node A pointer to the node to remove. * @param node A pointer to the node to remove.
*/ */
pointer remove(pointer node) { pointer remove(pointer node)
{
return remove(iterator(node)); return remove(iterator(node));
} }
@ -254,7 +272,8 @@ public:
* Removes a node from this list. * Removes a node from this list.
* @param node A reference to the node to remove. * @param node A reference to the node to remove.
*/ */
pointer remove(reference node) { pointer remove(reference node)
{
return remove(iterator(node)); return remove(iterator(node));
} }
@ -262,7 +281,8 @@ public:
* Is this list empty? * Is this list empty?
* @returns true if there are no nodes in this list. * @returns true if there are no nodes in this list.
*/ */
bool empty() const { bool empty() const
{
return root->next == root.get(); return root->next == root.get();
} }
@ -270,7 +290,8 @@ public:
* Gets the total number of elements within this list. * Gets the total number of elements within this list.
* @return the number of elements in this list. * @return the number of elements in this list.
*/ */
size_type size() const { size_type size() const
{
return static_cast<size_type>(std::distance(begin(), end())); return static_cast<size_type>(std::distance(begin(), end()));
} }
@ -278,7 +299,8 @@ public:
* Retrieves a reference to the node at the front of the list. * Retrieves a reference to the node at the front of the list.
* @note Must not be called on an empty list. * @note Must not be called on an empty list.
*/ */
reference front() { reference front()
{
DEBUG_ASSERT(!empty()); DEBUG_ASSERT(!empty());
return *begin(); return *begin();
} }
@ -287,7 +309,8 @@ public:
* Retrieves a constant reference to the node at the front of the list. * Retrieves a constant reference to the node at the front of the list.
* @note Must not be called on an empty list. * @note Must not be called on an empty list.
*/ */
const_reference front() const { const_reference front() const
{
DEBUG_ASSERT(!empty()); DEBUG_ASSERT(!empty());
return *begin(); return *begin();
} }
@ -296,7 +319,8 @@ public:
* Retrieves a reference to the node at the back of the list. * Retrieves a reference to the node at the back of the list.
* @note Must not be called on an empty list. * @note Must not be called on an empty list.
*/ */
reference back() { reference back()
{
DEBUG_ASSERT(!empty()); DEBUG_ASSERT(!empty());
return *--end(); return *--end();
} }
@ -305,7 +329,8 @@ public:
* Retrieves a constant reference to the node at the back of the list. * Retrieves a constant reference to the node at the back of the list.
* @note Must not be called on an empty list. * @note Must not be called on an empty list.
*/ */
const_reference back() const { const_reference back() const
{
DEBUG_ASSERT(!empty()); DEBUG_ASSERT(!empty());
return *--end(); return *--end();
} }
@ -331,7 +356,8 @@ public:
* Erases a node from the list, indicated by an iterator. * Erases a node from the list, indicated by an iterator.
* @param it The iterator that points to the node to erase. * @param it The iterator that points to the node to erase.
*/ */
iterator erase(iterator it) { iterator erase(iterator it)
{
remove(it); remove(it);
return it; return it;
} }
@ -340,7 +366,8 @@ public:
* Erases a node from this list. * Erases a node from this list.
* @param node A pointer to the node to erase from this list. * @param node A pointer to the node to erase from this list.
*/ */
iterator erase(pointer node) { iterator erase(pointer node)
{
return erase(iterator(node)); return erase(iterator(node));
} }
@ -348,7 +375,8 @@ public:
* Erases a node from this list. * Erases a node from this list.
* @param node A reference to the node to erase from this list. * @param node A reference to the node to erase from this list.
*/ */
iterator erase(reference node) { iterator erase(reference node)
{
return erase(iterator(node)); return erase(iterator(node));
} }
@ -356,7 +384,8 @@ public:
* Exchanges contents of this list with another list instance. * Exchanges contents of this list with another list instance.
* @param other The other list to swap with. * @param other The other list to swap with.
*/ */
void swap(intrusive_list& other) noexcept { void swap(intrusive_list& other) noexcept
{
root.swap(other.root); root.swap(other.root);
} }
@ -371,7 +400,8 @@ private:
* @param rhs The second list. * @param rhs The second list.
*/ */
template<typename T> template<typename T>
void swap(intrusive_list<T>& lhs, intrusive_list<T>& rhs) noexcept { void swap(intrusive_list<T>& lhs, intrusive_list<T>& rhs) noexcept
{
lhs.swap(rhs); lhs.swap(rhs);
} }

View file

@ -0,0 +1,32 @@
// This file is part of the mcl project.
// Copyright (c) 2022 merryhime
// SPDX-License-Identifier: MIT
// Reference: http://jonkagstrom.com/bit-mixer-construction/
#pragma once
#include <functional>
#include "mcl/bit/rotate.hpp"
#include "mcl/stdint.hpp"
namespace mcl::hash {
constexpr size_t xmrx(size_t x)
{
x ^= x >> 32;
x *= 0xff51afd7ed558ccd;
x ^= bit::rotate_right(x, 47) ^ bit::rotate_right(x, 23);
return x;
}
template<typename T>
struct avalanche_xmrx {
size_t operator()(const T& value)
{
return xmrx(std::hash<T>{}(value));
}
};
} // namespace mcl::hash

View file

@ -13,12 +13,14 @@ template<typename T>
struct reverse_adapter { struct reverse_adapter {
T& iterable; T& iterable;
constexpr auto begin() { constexpr auto begin()
{
using namespace std; using namespace std;
return rbegin(iterable); return rbegin(iterable);
} }
constexpr auto end() { constexpr auto end()
{
using namespace std; using namespace std;
return rend(iterable); return rend(iterable);
} }
@ -27,7 +29,8 @@ struct reverse_adapter {
} // namespace detail } // namespace detail
template<typename T> template<typename T>
constexpr detail::reverse_adapter<T> reverse(T&& iterable) { constexpr detail::reverse_adapter<T> reverse(T&& iterable)
{
return detail::reverse_adapter<T>{iterable}; return detail::reverse_adapter<T>{iterable};
} }

View file

@ -0,0 +1,46 @@
// This file is part of the mcl project.
// Copyright (c) 2022 merryhime
// SPDX-License-Identifier: MIT
#pragma once
#include <type_traits>
#ifdef _MSC_VER
# include <malloc.h>
#else
# include <cstdlib>
#endif
namespace mcl {
namespace detail {
struct aligned_alloc_deleter {
template<typename T>
void operator()(T* p) const
{
#ifdef _MSC_VER
_aligned_free(const_cast<std::remove_const_t<T>*>(p));
#else
std::free(const_cast<std::remove_const_t<T>*>(p));
#endif
}
};
} // namespace detail
template<size_t, typename T>
using overaligned_unique_ptr = std::unique_ptr<T, detail::aligned_alloc_deleter>;
template<size_t alignment, typename T>
auto make_overaligned_unique_ptr_array(size_t element_count)
{
const size_t min_size = element_count * sizeof(T);
const size_t alloc_size = (min_size + alignment - 1) / alignment * alignment;
#ifdef _MSC_VER
return overaligned_unique_ptr<alignment, T[]>{static_cast<T*>(_aligned_malloc(alloc_size, alignment))};
#else
return overaligned_unique_ptr<alignment, T[]>{static_cast<T*>(std::aligned_alloc(alignment, alloc_size))};
#endif
}
} // namespace mcl

View file

@ -21,7 +21,9 @@ class scope_exit final {
public: public:
explicit scope_exit(Function&& fn) explicit scope_exit(Function&& fn)
: function(std::move(fn)) {} : function(std::move(fn)) {}
~scope_exit() noexcept {
~scope_exit() noexcept
{
function(); function();
} }
@ -34,7 +36,9 @@ class scope_fail final {
public: public:
explicit scope_fail(Function&& fn) explicit scope_fail(Function&& fn)
: function(std::move(fn)), exception_count(std::uncaught_exceptions()) {} : function(std::move(fn)), exception_count(std::uncaught_exceptions()) {}
~scope_fail() noexcept {
~scope_fail() noexcept
{
if (std::uncaught_exceptions() > exception_count) { if (std::uncaught_exceptions() > exception_count) {
function(); function();
} }
@ -50,7 +54,9 @@ class scope_success final {
public: public:
explicit scope_success(Function&& fn) explicit scope_success(Function&& fn)
: function(std::move(fn)), exception_count(std::uncaught_exceptions()) {} : function(std::move(fn)), exception_count(std::uncaught_exceptions()) {}
~scope_success() {
~scope_success()
{
if (std::uncaught_exceptions() <= exception_count) { if (std::uncaught_exceptions() <= exception_count) {
function(); function();
} }
@ -64,17 +70,20 @@ private:
// We use ->* here as it has the highest precedence of the operators we can use. // We use ->* here as it has the highest precedence of the operators we can use.
template<typename Function> template<typename Function>
auto operator->*(scope_exit_tag, Function&& function) { auto operator->*(scope_exit_tag, Function&& function)
{
return scope_exit<std::decay_t<Function>>{std::forward<Function>(function)}; return scope_exit<std::decay_t<Function>>{std::forward<Function>(function)};
} }
template<typename Function> template<typename Function>
auto operator->*(scope_fail_tag, Function&& function) { auto operator->*(scope_fail_tag, Function&& function)
{
return scope_fail<std::decay_t<Function>>{std::forward<Function>(function)}; return scope_fail<std::decay_t<Function>>{std::forward<Function>(function)};
} }
template<typename Function> template<typename Function>
auto operator->*(scope_success_tag, Function&& function) { auto operator->*(scope_success_tag, Function&& function)
{
return scope_success<std::decay_t<Function>>{std::forward<Function>(function)}; return scope_success<std::decay_t<Function>>{std::forward<Function>(function)};
} }

View file

@ -11,8 +11,9 @@
namespace mcl::detail { namespace mcl::detail {
[[noreturn]] void assert_terminate_impl(fmt::string_view msg, fmt::format_args args) { [[noreturn]] void assert_terminate_impl(const char* expr_str, fmt::string_view msg, fmt::format_args args)
fmt::print(stderr, "assertion failed: "); {
fmt::print(stderr, "assertion failed: {}\nMessage:", expr_str);
fmt::vprint(stderr, msg, args); fmt::vprint(stderr, msg, args);
std::fflush(stderr); std::fflush(stderr);
std::terminate(); std::terminate();

View file

@ -1,13 +1,14 @@
add_executable(mcl-tests add_executable(mcl-tests
bit/bit_field_tests.cpp bit/bit_field_tests.cpp
main.cpp container/hmap.cpp
container/ihmap.cpp
mp/metavalue_tests.cpp mp/metavalue_tests.cpp
mp/typelist_tests.cpp mp/typelist_tests.cpp
type_traits/type_traits_tests.cpp type_traits/type_traits_tests.cpp
) )
target_include_directories(mcl-tests PUBLIC .) target_include_directories(mcl-tests PUBLIC .)
target_compile_options(mcl-tests PRIVATE ${STAMINA_CXX_FLAGS}) target_compile_options(mcl-tests PRIVATE ${STAMINA_CXX_FLAGS})
target_link_libraries(mcl-tests PRIVATE Catch2::Catch2 mcl) target_link_libraries(mcl-tests PRIVATE Catch2::Catch2WithMain mcl)
include(CTest) include(CTest)
include(Catch) include(Catch)

View file

@ -5,11 +5,12 @@
#include <array> #include <array>
#include <tuple> #include <tuple>
#include <catch2/catch.hpp> #include <catch2/catch_test_macros.hpp>
#include <mcl/bit/bit_field.hpp> #include <mcl/bit/bit_field.hpp>
#include <mcl/stdint.hpp> #include <mcl/stdint.hpp>
TEST_CASE("mcl::bit::ones", "[bit]") { TEST_CASE("mcl::bit::ones", "[bit]")
{
const std::array cases{ const std::array cases{
std::make_tuple<size_t, u8>(0, 0x00), std::make_tuple<size_t, u8>(0, 0x00),
std::make_tuple<size_t, u8>(1, 0x01), std::make_tuple<size_t, u8>(1, 0x01),

View file

@ -0,0 +1,66 @@
// This file is part of the mcl project.
// Copyright (c) 2022 merryhime
// SPDX-License-Identifier: MIT
#include <unordered_map>
#include <catch2/catch_test_macros.hpp>
#include <fmt/core.h>
#include <mcl/container/hmap.hpp>
#include <mcl/stdint.hpp>
TEST_CASE("mcl::hmap", "[hmap]")
{
mcl::hmap<u64, u64> double_map;
constexpr int count = 100000;
REQUIRE(double_map.empty());
for (int i = 0; i < count; ++i) {
double_map[i] = i * 2;
REQUIRE(double_map.size() == i + 1);
}
for (int i = 0; i < count; ++i) {
REQUIRE(double_map[i] == i * 2);
REQUIRE(double_map.contains(i));
}
for (int i = 0; i < count; ++i) {
auto iter = double_map.find(i);
REQUIRE(iter->first == i);
REQUIRE(iter->second == i * 2);
}
for (int i = count; i < count * 2; ++i) {
REQUIRE(!double_map.contains(i));
}
for (int i = 0; i < count; ++i) {
auto result = double_map.try_emplace(i, 0);
REQUIRE(!result.second);
}
for (auto [k, v] : double_map) {
REQUIRE(k * 2 == v);
}
std::unordered_map<u64, size_t> indexes_count;
for (auto [k, v] : double_map) {
(void)v;
indexes_count[k]++;
}
for (auto [k, v] : indexes_count) {
(void)k;
REQUIRE(v == 1);
}
REQUIRE(!double_map.empty());
double_map.clear();
REQUIRE(double_map.empty());
for (auto [k, v] : double_map) {
REQUIRE(false);
}
}

View file

@ -0,0 +1,66 @@
// This file is part of the mcl project.
// Copyright (c) 2022 merryhime
// SPDX-License-Identifier: MIT
#include <unordered_map>
#include <catch2/catch_test_macros.hpp>
#include <fmt/core.h>
#include <mcl/container/ihmap.hpp>
#include <mcl/stdint.hpp>
TEST_CASE("mcl::ihmap", "[ihmap]")
{
mcl::ihmap<u64, u64> double_map;
constexpr int count = 100000;
REQUIRE(double_map.empty());
for (int i = 0; i < count; ++i) {
double_map[i] = i * 2;
REQUIRE(double_map.size() == i + 1);
}
for (int i = 0; i < count; ++i) {
REQUIRE(double_map[i] == i * 2);
REQUIRE(double_map.contains(i));
}
for (int i = 0; i < count; ++i) {
auto iter = double_map.find(i);
REQUIRE(iter->first == i);
REQUIRE(iter->second == i * 2);
}
for (int i = count; i < count * 2; ++i) {
REQUIRE(!double_map.contains(i));
}
for (int i = 0; i < count; ++i) {
auto result = double_map.try_emplace(i, 0);
REQUIRE(!result.second);
}
for (auto [k, v] : double_map) {
REQUIRE(k * 2 == v);
}
std::unordered_map<u64, size_t> indexes_count;
for (auto [k, v] : double_map) {
(void)v;
indexes_count[k]++;
}
for (auto [k, v] : indexes_count) {
(void)k;
REQUIRE(v == 1);
}
REQUIRE(!double_map.empty());
double_map.clear();
REQUIRE(double_map.empty());
for (auto [k, v] : double_map) {
REQUIRE(false);
}
}

View file

@ -0,0 +1,218 @@
---
Language: Cpp
AccessModifierOffset: -4
AlignAfterOpenBracket: Align
AlignConsecutiveMacros: None
AlignConsecutiveAssignments: None
AlignConsecutiveBitFields: None
AlignConsecutiveDeclarations: None
AlignConsecutiveMacros: None
AlignEscapedNewlines: Right
AlignOperands: AlignAfterOperator
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortEnumsOnASingleLine: true
AllowShortBlocksOnASingleLine: Empty
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AllowShortLambdasOnASingleLine: All
AllowShortIfStatementsOnASingleLine: Never
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
AttributeMacros:
- __capability
BinPackArguments: true
BinPackParameters: false
BitFieldColonSpacing: Both
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: Never
AfterEnum: false
AfterFunction: true
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
BeforeLambdaBody: false
BeforeWhile: false
IndentBraces: false
SplitEmptyFunction: false
SplitEmptyRecord: false
SplitEmptyNamespace: false
BreakBeforeBinaryOperators: All
BreakBeforeBraces: Custom
BreakBeforeConceptDeclarations: true
BreakBeforeTernaryOperators: true
BreakBeforeInheritanceComma: false
BreakConstructorInitializersBeforeComma: true
BreakConstructorInitializers: BeforeComma
BreakInheritanceList: BeforeComma
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 0
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DeriveLineEnding: true
DerivePointerAlignment: false
DisableFormat: false
# EmptyLineAfterAccessModifier: Leave
EmptyLineBeforeAccessModifier: Always
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^<mach/'
Priority: 1
SortPriority: 0
CaseSensitive: false
- Regex: '^<windows.h>'
Priority: 1
SortPriority: 0
CaseSensitive: false
- Regex: '(^<signal.h>)|(^<sys/ucontext.h>)|(^<ucontext.h>)'
Priority: 1
SortPriority: 0
CaseSensitive: false
- Regex: '^<([^\.])*>$'
Priority: 2
SortPriority: 0
CaseSensitive: false
- Regex: '^<.*\.'
Priority: 3
SortPriority: 0
CaseSensitive: false
- Regex: '.*'
Priority: 4
SortPriority: 0
CaseSensitive: false
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
# IndentAccessModifiers: false
IndentCaseBlocks: false
IndentCaseLabels: false
IndentExternBlock: NoIndent
IndentGotoLabels: false
IndentPPDirectives: AfterHash
IndentRequires: false
IndentWidth: 4
IndentWrappedFunctionNames: false
# InsertTrailingCommas: None
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
NamespaceMacros:
ObjCBinPackProtocolList: Never
ObjCBlockIndentWidth: 2
ObjCBreakBeforeNestedBlockParam: true
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PenaltyIndentedWhitespace: 0
PointerAlignment: Left
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
- Cpp
- CPP
- 'c++'
- 'C++'
CanonicalDelimiter: ''
BasedOnStyle: google
- Language: TextProto
Delimiters:
- pb
- PB
- proto
- PROTO
EnclosingFunctions:
- EqualsProto
- EquivToProto
- PARSE_PARTIAL_TEXT_PROTO
- PARSE_TEST_PROTO
- PARSE_TEXT_PROTO
- ParseTextOrDie
- ParseTextProtoOrDie
- ParseTestProto
- ParsePartialTestProto
CanonicalDelimiter: ''
BasedOnStyle: google
ReflowComments: true
# ShortNamespaceLines: 5
SortIncludes: true
SortJavaStaticImport: Before
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: false
SpaceAroundPointerQualifiers: Default
SpaceBeforeAssignmentOperators: true
SpaceBeforeCaseColon: false
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceAroundPointerQualifiers: Default
SpaceBeforeRangeBasedForLoopColon: true
SpaceBeforeSquareBrackets: false
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInConditionalStatement: false
SpacesInCStyleCastParentheses: false
SpacesInConditionalStatement: false
SpacesInContainerLiterals: false
# SpacesInLineCommentPrefix: -1
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Latest
StatementAttributeLikeMacros:
- Q_EMIT
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
TabWidth: 4
TypenameMacros:
UseCRLF: false
UseTab: Never
WhitespaceSensitiveMacros:
- STRINGIZE
- PP_STRINGIZE
- BOOST_PP_STRINGIZE
- NS_SWIFT_NAME
- CF_SWIFT_NAME
- FCODE
- ICODE
...

View file

@ -0,0 +1,38 @@
on: [push, pull_request]
jobs:
test_on_ubuntu:
runs-on: ubuntu-latest
name: Build on ${{ matrix.distro }} ${{ matrix.arch }}
strategy:
matrix:
include:
- arch: aarch64
distro: ubuntu_latest
steps:
- uses: actions/checkout@v3
- uses: uraimo/run-on-arch-action@v2
name: Build and Test
id: build
with:
arch: ${{ matrix.arch }}
distro: ${{ matrix.distro }}
shell: /bin/bash
install: |
apt-get update -q -y
apt-get install -q -y make cmake g++ git
pushd /tmp
git clone https://github.com/catchorg/Catch2.git
cd Catch2
cmake -Bbuild -H. -DBUILD_TESTING=OFF
cmake --build build/ --target install
popd
run: |
cmake -Bbuild -H.
cmake --build build
./build/oaknut-tests

View file

@ -0,0 +1,4 @@
.DS_Store
a.out
work/
*build*/

View file

@ -0,0 +1,57 @@
cmake_minimum_required(VERSION 3.8)
project(oaknut LANGUAGES CXX VERSION 0.0.0)
# Determine if we're built as a subproject (using add_subdirectory)
# or if this is the master project.
set(MASTER_PROJECT OFF)
if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
set(MASTER_PROJECT ON)
endif()
# Disable in-source builds
set(CMAKE_DISABLE_SOURCE_CHANGES ON)
set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
message(SEND_ERROR "In-source builds are not allowed.")
endif()
# Source project files
set(header_files
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/arm64_encode_helpers.inc.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/arm64_mnemonics.inc.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/enum.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/fpsimd_mnemonics.inc.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/imm.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/list.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/multi_typed_name.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/offset.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/reg.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/string_literal.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/oaknut.hpp
)
# Library definition
add_library(oaknut INTERFACE)
add_library(merry::oaknut ALIAS oaknut)
target_sources(oaknut INTERFACE "$<BUILD_INTERFACE:${header_files}>")
target_include_directories(oaknut INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
target_compile_features(oaknut INTERFACE cxx_std_20)
# Tests
if (MASTER_PROJECT)
find_package(Catch2 3 REQUIRED)
add_executable(oaknut-tests
tests/basic.cpp
tests/fpsimd.cpp
tests/general.cpp
)
target_include_directories(oaknut-tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/tests)
target_link_libraries(oaknut-tests PRIVATE Catch2::Catch2WithMain merry::oaknut)
target_compile_options(oaknut-tests PRIVATE -Wall -Wextra -Wcast-qual -pedantic -pedantic-errors -Wfatal-errors -Wno-missing-braces)
include(CTest)
include(Catch)
catch_discover_tests(oaknut-tests)
enable_testing()
endif()

21
externals/dynarmic/externals/oaknut/LICENSE vendored Executable file
View file

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2022 merryhime <https://mary.rs>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

29
externals/dynarmic/externals/oaknut/README.md vendored Executable file
View file

@ -0,0 +1,29 @@
# Oaknut
*A C++20 assembler for AArch64 (ARMv8.0)*
Oaknut is a header-only library that allows one to dynamically assemble code in-memory at runtime.
## Usage
Simple example:
```cpp
using EmittedFunction = int (*)();
EmittedFunction EmitExample(oaknut::CodeGenerator& code, int value)
{
using namespace oaknut::util;
EmittedFunction result = code.ptr<EmittedFunction>();
code.MOVZ(W0, value);
code.RET();
return result;
}
```
## License
This project is [MIT licensed](LICENSE).

View file

@ -0,0 +1,124 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#include <cstddef>
#include <cstdint>
#include <new>
#if defined(_WIN32)
# include <windows.h>
#elif defined(__APPLE__)
# include <libkern/OSCacheControl.h>
# include <pthread.h>
# include <sys/mman.h>
# include <unistd.h>
#else
# include <sys/mman.h>
#endif
namespace oaknut {
class CodeBlock {
public:
explicit CodeBlock(std::size_t size)
: m_size(size)
{
#if defined(_WIN32)
m_memory = (std::uint32_t*)VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#elif defined(__APPLE__)
m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_JIT, -1, 0);
#else
m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
#endif
if (m_memory == nullptr)
throw std::bad_alloc{};
}
~CodeBlock()
{
if (m_memory == nullptr)
return;
#if defined(_WIN32)
VirtualFree((void*)m_memory, 0, MEM_RELEASE);
#else
munmap(m_memory, m_size);
#endif
}
CodeBlock(const CodeBlock&) = delete;
CodeBlock& operator=(const CodeBlock&) = delete;
CodeBlock(CodeBlock&&) = delete;
CodeBlock& operator=(CodeBlock&&) = delete;
std::uint32_t* ptr() const
{
return m_memory;
}
void protect()
{
#if defined(__APPLE__)
pthread_jit_write_protect_np(1);
#endif
}
void unprotect()
{
#if defined(__APPLE__)
pthread_jit_write_protect_np(0);
#endif
}
void invalidate(std::uint32_t* mem, std::size_t size)
{
#if defined(__APPLE__)
sys_icache_invalidate(mem, size);
#else
static std::size_t icache_line_size = 0x10000, dcache_line_size = 0x10000;
std::uint64_t ctr;
__asm__ volatile("mrs %0, ctr_el0"
: "=r"(ctr));
const std::size_t isize = icache_line_size = std::min<std::size_t>(icache_line_size, 4 << ((ctr >> 0) & 0xf));
const std::size_t dsize = dcache_line_size = std::min<std::size_t>(dcache_line_size, 4 << ((ctr >> 16) & 0xf));
const std::uintptr_t end = (std::uintptr_t)mem + size;
for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(dsize - 1); addr < end; addr += dsize) {
__asm__ volatile("dc cvau, %0"
:
: "r"(addr)
: "memory");
}
__asm__ volatile("dsb ish\n"
:
:
: "memory");
for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(isize - 1); addr < end; addr += isize) {
__asm__ volatile("ic ivau, %0"
:
: "r"(addr)
: "memory");
}
__asm__ volatile("dsb ish\nisb\n"
:
:
: "memory");
#endif
}
void invalidate_all()
{
invalidate(m_memory, m_size);
}
protected:
std::uint32_t* m_memory;
std::size_t m_size = 0;
};
} // namespace oaknut

View file

@ -0,0 +1,155 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
template<std::uint32_t mask_>
static constexpr std::uint32_t pdep(std::uint32_t val)
{
std::uint32_t mask = mask_;
std::uint32_t res = 0;
for (std::uint32_t bb = 1; mask; bb += bb) {
if (val & bb)
res |= mask & -mask;
mask &= mask - 1;
}
return res;
}
#define OAKNUT_STD_ENCODE(TYPE, ACCESS, SIZE) \
template<std::uint32_t splat> \
std::uint32_t encode(TYPE v) \
{ \
static_assert(std::popcount(splat) == SIZE); \
return pdep<splat>(static_cast<std::uint32_t>(ACCESS)); \
}
OAKNUT_STD_ENCODE(RReg, v.index() & 31, 5)
OAKNUT_STD_ENCODE(VReg, v.index() & 31, 5)
OAKNUT_STD_ENCODE(VRegArranged, v.index() & 31, 5)
OAKNUT_STD_ENCODE(AddSubImm, v.m_encoded, 13)
OAKNUT_STD_ENCODE(BitImm32, v.m_encoded, 12)
OAKNUT_STD_ENCODE(BitImm64, v.m_encoded, 13)
OAKNUT_STD_ENCODE(LslShift<32>, v.m_encoded, 12)
OAKNUT_STD_ENCODE(LslShift<64>, v.m_encoded, 12)
OAKNUT_STD_ENCODE(FImm8, v.m_encoded, 8)
OAKNUT_STD_ENCODE(RepImm, v.m_encoded, 8)
OAKNUT_STD_ENCODE(Cond, v, 4)
OAKNUT_STD_ENCODE(AddSubExt, v, 3)
OAKNUT_STD_ENCODE(IndexExt, v, 3)
OAKNUT_STD_ENCODE(AddSubShift, v, 2)
OAKNUT_STD_ENCODE(LogShift, v, 2)
OAKNUT_STD_ENCODE(PstateField, v, 6)
OAKNUT_STD_ENCODE(SystemReg, v, 15)
OAKNUT_STD_ENCODE(AtOp, v, 7)
OAKNUT_STD_ENCODE(BarrierOp, v, 4)
OAKNUT_STD_ENCODE(DcOp, v, 10)
OAKNUT_STD_ENCODE(IcOp, v, 10)
OAKNUT_STD_ENCODE(PrfOp, v, 5)
OAKNUT_STD_ENCODE(TlbiOp, v, 10)
template<std::uint32_t splat>
std::uint32_t encode(MovImm16 v)
{
static_assert(std::popcount(splat) == 17 || std::popcount(splat) == 18);
if constexpr (std::popcount(splat) == 17) {
constexpr std::uint32_t mask = (1 << std::popcount(splat)) - 1;
if ((v.m_encoded & mask) != v.m_encoded)
throw "invalid MovImm16";
}
return pdep<splat>(v.m_encoded);
}
template<std::uint32_t splat, std::size_t imm_size>
std::uint32_t encode(Imm<imm_size> v)
{
static_assert(std::popcount(splat) >= imm_size);
return pdep<splat>(v.value());
}
template<std::uint32_t splat, int A, int B>
std::uint32_t encode(ImmChoice<A, B> v)
{
static_assert(std::popcount(splat) == 1);
return pdep<splat>(v.m_encoded);
}
template<std::uint32_t splat, int A, int B, int C, int D>
std::uint32_t encode(ImmChoice<A, B, C, D> v)
{
static_assert(std::popcount(splat) == 2);
return pdep<splat>(v.m_encoded);
}
template<std::uint32_t splat, std::size_t size, std::size_t align>
std::uint32_t encode(SOffset<size, align> v)
{
static_assert(std::popcount(splat) == size - align);
return pdep<splat>(v.m_encoded);
}
template<std::uint32_t splat, std::size_t size, std::size_t align>
std::uint32_t encode(POffset<size, align> v)
{
static_assert(std::popcount(splat) == size - align);
return pdep<splat>(v.m_encoded);
}
template<std::uint32_t splat>
std::uint32_t encode(std::uint32_t v)
{
return pdep<splat>(v);
}
template<std::uint32_t splat, typename T, size_t N>
std::uint32_t encode(List<T, N> v)
{
return encode<splat>(v.m_base);
}
#undef OAKNUT_STD_ENCODE
void addsubext_lsl_correction(AddSubExt& ext, XRegSp)
{
if (ext == AddSubExt::LSL)
ext = AddSubExt::UXTX;
}
void addsubext_lsl_correction(AddSubExt& ext, WRegWsp)
{
if (ext == AddSubExt::LSL)
ext = AddSubExt::UXTW;
}
void addsubext_lsl_correction(AddSubExt& ext, XReg)
{
if (ext == AddSubExt::LSL)
ext = AddSubExt::UXTX;
}
void addsubext_lsl_correction(AddSubExt& ext, WReg)
{
if (ext == AddSubExt::LSL)
ext = AddSubExt::UXTW;
}
void addsubext_verify_reg_size(AddSubExt ext, RReg rm)
{
if (rm.bitsize() == 32 && (static_cast<int>(ext) & 0b011) != 0b011)
return;
if (rm.bitsize() == 64 && (static_cast<int>(ext) & 0b011) == 0b011)
return;
throw "invalid AddSubExt choice for rm size";
}
void indexext_verify_reg_size(IndexExt ext, RReg rm)
{
if (rm.bitsize() == 32 && (static_cast<int>(ext) & 1) == 0)
return;
if (rm.bitsize() == 64 && (static_cast<int>(ext) & 1) == 1)
return;
throw "invalid IndexExt choice for rm size";
}
void tbz_verify_reg_size(RReg rt, Imm<6> imm)
{
if (rt.bitsize() == 32 && imm.value() >= 32)
throw "invalid imm choice for rt size";
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,242 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
namespace oaknut {
struct PostIndexed {};
struct PreIndexed {};
enum class LslSymbol {
LSL,
};
enum class MslSymbol {
MSL,
};
enum class Cond {
EQ,
NE,
CS,
CC,
MI,
PL,
VS,
VC,
HI,
LS,
GE,
LT,
GT,
LE,
AL,
NV,
HS = CS,
LO = CC,
};
constexpr Cond invert(Cond c)
{
return static_cast<Cond>(static_cast<unsigned>(c) ^ 1);
}
enum class AddSubExt {
UXTB,
UXTH,
UXTW,
UXTX,
SXTB,
SXTH,
SXTW,
SXTX,
LSL, // UXTW (32-bit) or UXTX (64-bit)
};
enum class IndexExt {
UXTW = 0b010,
LSL = 0b011,
SXTW = 0b110,
SXTX = 0b111,
};
enum class AddSubShift {
LSL,
LSR,
ASR,
};
enum class LogShift {
LSL,
LSR,
ASR,
ROR,
};
enum class PstateField {
UAO = 0b000'011, // ARMv8.2-UAO
PAN = 0b000'100, // ARMv8.1-PAN
SPSel = 0b000'101,
DIT = 0b011'010, // ARMv8.4-DIT
DAIFSet = 0b011'110,
DAIFClr = 0b011'111,
};
enum class SystemReg {
};
enum class AtOp {
S1E1R = 0b000'0'000,
S1E1W = 0b000'0'001,
S1E0R = 0b000'0'010,
S1E0W = 0b000'0'011,
S1E1RP = 0b000'1'000, // ARMv8.2-ATS1E1
S1E1WP = 0b000'1'001, // ARMv8.2-ATS1E1
S1E2R = 0b100'0'000,
S1E2W = 0b100'0'001,
S12E1R = 0b100'0'100,
S12E1W = 0b100'0'101,
S12E0R = 0b100'0'110,
S12E0W = 0b100'0'111,
S1E3R = 0b110'0'000,
S1E3W = 0b110'0'001,
};
enum class BarrierOp {
SY = 0b1111,
ST = 0b1110,
LD = 0b1101,
ISH = 0b1011,
ISHST = 0b1010,
ISHLD = 0b1001,
NSH = 0b0111,
NSHST = 0b0110,
NSHLD = 0b0101,
OSH = 0b0011,
OSHST = 0b0010,
OSHLD = 0b0001,
};
enum class DcOp {
IVAC = 0b000'0110'001,
ISW = 0b000'0110'010,
CSW = 0b000'1010'010,
CISW = 0b000'1110'010,
ZVA = 0b011'0100'001,
CVAC = 0b011'1010'001,
CVAU = 0b011'1011'001,
CVAP = 0b011'1100'001, // ARMv8.2-DCPoP
CIVAC = 0b011'1110'001,
};
enum class IcOp {
IALLUIS = 0b000'0001'000,
IALLU = 0b000'0101'000,
IVAU = 0b011'0101'001,
};
enum class PrfOp {
PLDL1KEEP = 0b00'00'0,
PLDL1STRM = 0b00'00'1,
PLDL2KEEP = 0b00'01'0,
PLDL2STRM = 0b00'01'1,
PLDL3KEEP = 0b00'10'0,
PLDL3STRM = 0b00'10'1,
PLIL1KEEP = 0b01'00'0,
PLIL1STRM = 0b01'00'1,
PLIL2KEEP = 0b01'01'0,
PLIL2STRM = 0b01'01'1,
PLIL3KEEP = 0b01'10'0,
PLIL3STRM = 0b01'10'1,
PSTL1KEEP = 0b10'00'0,
PSTL1STRM = 0b10'00'1,
PSTL2KEEP = 0b10'01'0,
PSTL2STRM = 0b10'01'1,
PSTL3KEEP = 0b10'10'0,
PSTL3STRM = 0b10'10'1,
};
enum class TlbiOp {
VMALLE1OS = 0b000'0001'000, // ARMv8.4-TLBI
VAE1OS = 0b000'0001'001, // ARMv8.4-TLBI
ASIDE1OS = 0b000'0001'010, // ARMv8.4-TLBI
VAAE1OS = 0b000'0001'011, // ARMv8.4-TLBI
VALE1OS = 0b000'0001'101, // ARMv8.4-TLBI
VAALE1OS = 0b000'0001'111, // ARMv8.4-TLBI
RVAE1IS = 0b000'0010'001, // ARMv8.4-TLBI
RVAAE1IS = 0b000'0010'011, // ARMv8.4-TLBI
RVALE1IS = 0b000'0010'101, // ARMv8.4-TLBI
RVAALE1IS = 0b000'0010'111, // ARMv8.4-TLBI
VMALLE1IS = 0b000'0011'000,
VAE1IS = 0b000'0011'001,
ASIDE1IS = 0b000'0011'010,
VAAE1IS = 0b000'0011'011,
VALE1IS = 0b000'0011'101,
VAALE1IS = 0b000'0011'111,
RVAE1OS = 0b000'0101'001, // ARMv8.4-TLBI
RVAAE1OS = 0b000'0101'011, // ARMv8.4-TLBI
RVALE1OS = 0b000'0101'101, // ARMv8.4-TLBI
RVAALE1OS = 0b000'0101'111, // ARMv8.4-TLBI
RVAE1 = 0b000'0110'001, // ARMv8.4-TLBI
RVAAE1 = 0b000'0110'011, // ARMv8.4-TLBI
RVALE1 = 0b000'0110'101, // ARMv8.4-TLBI
RVAALE1 = 0b000'0110'111, // ARMv8.4-TLBI
VMALLE1 = 0b000'0111'000,
VAE1 = 0b000'0111'001,
ASIDE1 = 0b000'0111'010,
VAAE1 = 0b000'0111'011,
VALE1 = 0b000'0111'101,
VAALE1 = 0b000'0111'111,
IPAS2E1IS = 0b100'0000'001,
RIPAS2E1IS = 0b100'0000'010, // ARMv8.4-TLBI
IPAS2LE1IS = 0b100'0000'101,
RIPAS2LE1IS = 0b100'0000'110, // ARMv8.4-TLBI
ALLE2OS = 0b100'0001'000, // ARMv8.4-TLBI
VAE2OS = 0b100'0001'001, // ARMv8.4-TLBI
ALLE1OS = 0b100'0001'100, // ARMv8.4-TLBI
VALE2OS = 0b100'0001'101, // ARMv8.4-TLBI
VMALLS12E1OS = 0b100'0001'110, // ARMv8.4-TLBI
RVAE2IS = 0b100'0010'001, // ARMv8.4-TLBI
RVALE2IS = 0b100'0010'101, // ARMv8.4-TLBI
ALLE2IS = 0b100'0011'000,
VAE2IS = 0b100'0011'001,
ALLE1IS = 0b100'0011'100,
VALE2IS = 0b100'0011'101,
VMALLS12E1IS = 0b100'0011'110,
IPAS2E1OS = 0b100'0100'000, // ARMv8.4-TLBI
IPAS2E1 = 0b100'0100'001,
RIPAS2E1 = 0b100'0100'010, // ARMv8.4-TLBI
RIPAS2E1OS = 0b100'0100'011, // ARMv8.4-TLBI
IPAS2LE1OS = 0b100'0100'100, // ARMv8.4-TLBI
IPAS2LE1 = 0b100'0100'101,
RIPAS2LE1 = 0b100'0100'110, // ARMv8.4-TLBI
RIPAS2LE1OS = 0b100'0100'111, // ARMv8.4-TLBI
RVAE2OS = 0b100'0101'001, // ARMv8.4-TLBI
RVALE2OS = 0b100'0101'101, // ARMv8.4-TLBI
RVAE2 = 0b100'0110'001, // ARMv8.4-TLBI
RVALE2 = 0b100'0110'101, // ARMv8.4-TLBI
ALLE2 = 0b100'0111'000,
VAE2 = 0b100'0111'001,
ALLE1 = 0b100'0111'100,
VALE2 = 0b100'0111'101,
VMALLS12E1 = 0b100'0111'110,
ALLE3OS = 0b110'0001'000, // ARMv8.4-TLBI
VAE3OS = 0b110'0001'001, // ARMv8.4-TLBI
VALE3OS = 0b110'0001'101, // ARMv8.4-TLBI
RVAE3IS = 0b110'0010'001, // ARMv8.4-TLBI
RVALE3IS = 0b110'0010'101, // ARMv8.4-TLBI
ALLE3IS = 0b110'0011'000,
VAE3IS = 0b110'0011'001,
VALE3IS = 0b110'0011'101,
RVAE3OS = 0b110'0101'001, // ARMv8.4-TLBI
RVALE3OS = 0b110'0101'101, // ARMv8.4-TLBI
RVAE3 = 0b110'0110'001, // ARMv8.4-TLBI
RVALE3 = 0b110'0110'101, // ARMv8.4-TLBI
ALLE3 = 0b110'0111'000,
VAE3 = 0b110'0111'001,
VALE3 = 0b110'0111'101,
};
} // namespace oaknut

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,317 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <bit>
#include <compare>
#include <cstddef>
#include <cstdint>
#include <optional>
namespace oaknut {
template<std::size_t bit_size_>
struct Imm {
public:
static_assert(bit_size_ != 0 && bit_size_ <= 32, "Invalid bit_size");
static constexpr std::size_t bit_size = bit_size_;
static constexpr std::uint32_t mask = (1 << bit_size) - 1;
constexpr /* implicit */ Imm(std::uint32_t value_)
: m_value(value_)
{
if (!is_valid(value_))
throw "outsized Imm value";
}
constexpr auto operator<=>(const Imm& other) const { return m_value <=> other.m_value; }
constexpr auto operator<=>(std::uint32_t other) const { return operator<=>(Imm{other}); }
constexpr std::uint32_t value() const { return m_value; }
static bool is_valid(std::uint32_t value_)
{
return ((value_ & mask) == value_);
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_value;
};
enum class AddSubImmShift {
SHL_0,
SHL_12,
};
struct AddSubImm {
public:
constexpr AddSubImm(std::uint32_t value_, AddSubImmShift shift_)
: m_encoded(value_ | ((shift_ == AddSubImmShift::SHL_12) ? 1 << 12 : 0))
{
if ((value_ & 0xFFF) != value_)
throw "invalid AddSubImm";
}
constexpr /* implicit */ AddSubImm(std::uint64_t value_)
{
if ((value_ & 0xFFF) == value_) {
m_encoded = value_;
} else if ((value_ & 0xFFF000) == value_) {
m_encoded = (value_ >> 12) | (1 << 12);
} else {
throw "invalid AddSubImm";
}
}
static constexpr bool is_valid(std::uint64_t value_)
{
return ((value_ & 0xFFF) == value_) || ((value_ & 0xFFF000) == value_);
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
enum class MovImm16Shift {
SHL_0,
SHL_16,
SHL_32,
SHL_48,
};
struct MovImm16 {
public:
MovImm16(std::uint16_t value_, MovImm16Shift shift_)
: m_encoded(static_cast<std::uint32_t>(value_) | (static_cast<std::uint32_t>(shift_) << 16))
{}
constexpr /* implict */ MovImm16(std::uint64_t value_)
{
std::uint32_t shift = 0;
while (value_ != 0) {
const std::uint32_t lsw = static_cast<std::uint16_t>(value_ & 0xFFFF);
if (value_ == lsw) {
m_encoded = lsw | (shift << 16);
return;
} else if (lsw != 0) {
throw "invalid MovImm16";
}
value_ >>= 16;
shift++;
}
}
static constexpr bool is_valid(std::uint64_t value_)
{
return ((value_ & 0xFFFF) == value_) || ((value_ & 0xFFFF0000) == value_) || ((value_ & 0xFFFF00000000) == value_) || ((value_ & 0xFFFF000000000000) == value_);
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded = 0;
};
namespace detail {
constexpr std::optional<std::uint32_t> encode_bit_imm(std::uint64_t value)
{
if (value == 0 || (~value) == 0)
return std::nullopt;
const std::size_t rotation = std::countr_zero(value & (value + 1));
const std::uint64_t rot_value = std::rotr(value, rotation);
const std::size_t esize = std::countr_zero(rot_value & (rot_value + 1));
const std::size_t ones = std::countr_one(rot_value);
if (std::rotr(value, esize) != value)
return std::nullopt;
const std::uint32_t S = ((-esize) << 1) | (ones - 1);
const std::uint32_t R = (esize - rotation) & (esize - 1);
const std::uint32_t N = (~S >> 6) & 1;
return static_cast<std::uint32_t>((S & 0b111111) | (R << 6) | (N << 12));
}
constexpr std::optional<std::uint32_t> encode_bit_imm(std::uint32_t value)
{
const std::uint64_t value_u64 = (static_cast<std::uint64_t>(value) << 32) | static_cast<std::uint64_t>(value);
const auto result = encode_bit_imm(value_u64);
if (result && (*result & 0b0'111111'111111) != *result)
return std::nullopt;
return result;
}
} // namespace detail
struct BitImm32 {
public:
constexpr BitImm32(Imm<6> imms, Imm<6> immr)
: m_encoded((imms.value() << 6) | immr.value())
{}
constexpr /* implicit */ BitImm32(std::uint32_t value)
{
const auto encoded = detail::encode_bit_imm(value);
if (!encoded || (*encoded & 0x1000) != 0)
throw "invalid BitImm32";
m_encoded = *encoded;
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
struct BitImm64 {
public:
constexpr BitImm64(bool N, Imm<6> imms, Imm<6> immr)
: m_encoded((N ? 1 << 12 : 0) | (imms.value() << 6) | immr.value())
{}
constexpr /* implicit */ BitImm64(std::uint64_t value)
{
const auto encoded = detail::encode_bit_imm(value);
if (!encoded)
throw "invalid BitImm64";
m_encoded = *encoded;
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
struct FImm8 {
public:
constexpr explicit FImm8(std::uint8_t encoded)
: m_encoded(encoded)
{}
constexpr FImm8(bool sign, Imm<3> exp, Imm<4> mantissa)
: m_encoded((sign ? 1 << 7 : 0) | (exp.value() << 4) | (mantissa.value()))
{}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
struct RepImm {
public:
constexpr explicit RepImm(std::uint8_t encoded)
: m_encoded(encoded)
{}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
template<int A>
struct ImmConst {
constexpr /* implicit */ ImmConst(int value)
{
if (value != A) {
throw "invalid ImmConst";
}
}
};
struct ImmConstFZero {
constexpr /* implicit */ ImmConstFZero(double value)
{
if (value != 0) {
throw "invalid ImmConstFZero";
}
}
};
template<int...>
struct ImmChoice;
template<int A, int B>
struct ImmChoice<A, B> {
constexpr /* implicit */ ImmChoice(int value)
{
if (value == A) {
m_encoded = 0;
} else if (value == B) {
m_encoded = 1;
} else {
throw "invalid ImmChoice";
}
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
template<int A, int B, int C, int D>
struct ImmChoice<A, B, C, D> {
constexpr /* implicit */ ImmChoice(int value)
{
if (value == A) {
m_encoded = 0;
} else if (value == B) {
m_encoded = 1;
} else if (value == C) {
m_encoded = 2;
} else if (value == D) {
m_encoded = 3;
} else {
throw "invalid ImmChoice";
}
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
template<unsigned Start, unsigned End>
struct ImmRange {
constexpr /* implicit */ ImmRange(unsigned value_)
: m_value(value_)
{
if (value_ < Start || value_ > End) {
throw "invalid ImmRange";
}
}
constexpr unsigned value() const { return m_value; }
private:
unsigned m_value;
};
template<std::size_t max_value>
struct LslShift {
constexpr /* implicit */ LslShift(std::size_t amount)
: m_encoded((((-amount) & (max_value - 1)) << 6) | (max_value - amount - 1))
{
if (amount >= max_value)
throw "LslShift out of range";
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
} // namespace oaknut

View file

@ -0,0 +1,80 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <cstddef>
#include <tuple>
#include <type_traits>
namespace oaknut {
struct Elem;
template<typename>
struct ElemSelector;
struct VRegArranged;
namespace detail {
template<typename>
struct is_instance_of_ElemSelector : std::false_type {};
template<typename E>
struct is_instance_of_ElemSelector<ElemSelector<E>> : std::true_type {};
template<class T>
constexpr bool is_instance_of_ElemSelector_v = is_instance_of_ElemSelector<T>::value;
struct BaseOnlyTag {};
} // namespace detail
template<typename T, std::size_t N>
struct List {
template<typename... U>
constexpr explicit List(U... args)
: m_base(std::get<0>(std::tie(args...)))
{
static_assert((std::is_same_v<T, U> && ...));
static_assert(sizeof...(args) == N);
static_assert(std::is_base_of_v<VRegArranged, T> || std::is_base_of_v<Elem, T> || detail::is_instance_of_ElemSelector_v<T>);
if (!verify(std::index_sequence_for<U...>{}, args...))
throw "invalid List";
}
constexpr auto operator[](unsigned elem_index) const
{
using S = decltype(m_base[elem_index]);
return List<S, N>(detail::BaseOnlyTag{}, m_base[elem_index]);
}
private:
template<typename>
friend class BasicCodeGenerator;
template<typename, std::size_t>
friend struct List;
constexpr explicit List(detail::BaseOnlyTag, T base_)
: m_base(base_)
{}
template<typename... U, std::size_t... indexes>
constexpr bool verify(std::index_sequence<indexes...>, U... args)
{
if constexpr (std::is_base_of_v<VRegArranged, T>) {
return (((m_base.index() + indexes) % 32 == static_cast<std::size_t>(args.index())) && ...);
} else if constexpr (std::is_base_of_v<Elem, T>) {
return (((m_base.reg_index() + indexes) % 32 == static_cast<std::size_t>(args.reg_index()) && m_base.elem_index() == args.elem_index()) && ...);
} else {
return (((m_base.reg_index() + indexes) % 32 == static_cast<std::size_t>(args.reg_index())) && ...);
}
}
T m_base;
};
template<typename... U>
List(U...) -> List<std::common_type_t<U...>, sizeof...(U)>;
} // namespace oaknut

View file

@ -0,0 +1,19 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
namespace oaknut {
template<auto... Vs>
struct MultiTypedName;
template<>
struct MultiTypedName<> {};
template<auto V, auto... Vs>
struct MultiTypedName<V, Vs...> : public MultiTypedName<Vs...> {
constexpr operator decltype(V)() const { return V; }
};
} // namespace oaknut

View file

@ -0,0 +1,129 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <cstddef>
#include <cstdint>
#include <variant>
namespace oaknut {
struct Label;
namespace detail {
constexpr std::uint64_t inverse_mask_from_size(std::size_t size)
{
return (~std::uint64_t{0}) << size;
}
constexpr std::uint64_t mask_from_size(std::size_t size)
{
return (~std::uint64_t{0}) >> (64 - size);
}
template<std::size_t bit_count>
constexpr std::uint64_t sign_extend(std::uint64_t value)
{
static_assert(bit_count != 0, "cannot sign-extend zero-sized value");
constexpr size_t shift_amount = 64 - bit_count;
return static_cast<std::uint64_t>(static_cast<std::int64_t>(value << shift_amount) >> shift_amount);
}
} // namespace detail
template<std::size_t bitsize, std::size_t alignment>
struct AddrOffset {
AddrOffset(std::ptrdiff_t diff)
: m_payload(encode(diff))
{}
AddrOffset(Label& label)
: m_payload(&label)
{}
AddrOffset(void* ptr)
: m_payload(ptr)
{}
static std::uint32_t encode(std::ptrdiff_t diff)
{
const std::uint64_t diff_u64 = static_cast<std::uint64_t>(diff);
if (detail::sign_extend<bitsize>(diff_u64) != diff_u64)
throw "out of range";
if (diff_u64 != (diff_u64 & detail::inverse_mask_from_size(alignment)))
throw "misalignment";
return static_cast<std::uint32_t>((diff_u64 & detail::mask_from_size(bitsize)) >> alignment);
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::variant<std::uint32_t, Label*, void*> m_payload;
};
template<std::size_t bitsize>
struct PageOffset {
PageOffset(void* ptr)
: m_payload(ptr)
{}
PageOffset(Label& label)
: m_payload(&label)
{}
static std::uint32_t encode(std::uintptr_t current_addr, std::uintptr_t target)
{
const std::int64_t page_diff = (static_cast<std::int64_t>(target) >> 12) - (static_cast<std::int64_t>(current_addr) >> 12);
if (detail::sign_extend<bitsize>(page_diff) != page_diff)
throw "out of range";
return static_cast<std::uint32_t>(page_diff & detail::mask_from_size(bitsize));
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::variant<Label*, void*> m_payload;
};
template<std::size_t bitsize, std::size_t alignment>
struct SOffset {
SOffset(std::int64_t offset)
{
const std::uint64_t diff_u64 = static_cast<std::uint64_t>(offset);
if (detail::sign_extend<bitsize>(diff_u64) != diff_u64)
throw "out of range";
if (diff_u64 != (diff_u64 & detail::inverse_mask_from_size(alignment)))
throw "misalignment";
m_encoded = static_cast<std::uint32_t>((diff_u64 & detail::mask_from_size(bitsize)) >> alignment);
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
template<std::size_t bitsize, std::size_t alignment>
struct POffset {
POffset(std::int64_t offset)
{
const std::uint64_t diff_u64 = static_cast<std::uint64_t>(offset);
if (diff_u64 > detail::mask_from_size(bitsize))
throw "out of range";
if (diff_u64 != (diff_u64 & detail::inverse_mask_from_size(alignment)))
throw "misalignment";
m_encoded = static_cast<std::uint32_t>((diff_u64 & detail::mask_from_size(bitsize)) >> alignment);
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
} // namespace oaknut

View file

@ -0,0 +1,441 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <cassert>
#include <cstddef>
#include <cstdint>
namespace oaknut {
struct Reg;
struct RReg;
struct ZrReg;
struct WzrReg;
struct XReg;
struct WReg;
struct SpReg;
struct WspReg;
struct XRegSp;
struct XRegWsp;
struct VReg;
struct VRegArranged;
struct BReg;
struct HReg;
struct SReg;
struct DReg;
struct QReg;
struct VReg_8B;
struct VReg_4H;
struct VReg_2S;
struct VReg_1D;
struct VReg_16B;
struct VReg_8H;
struct VReg_4S;
struct VReg_2D;
struct VReg_1Q;
struct VRegSelector;
template<typename Elem>
struct ElemSelector;
struct BElem;
struct HElem;
struct SElem;
struct DElem;
struct Reg {
constexpr explicit Reg(bool is_vector_, unsigned bitsize_, int index_)
: m_index(index_)
, m_bitsize(bitsize_)
, m_is_vector(is_vector_)
{
assert(index_ >= -1 && index_ <= 31);
assert(bitsize_ != 0 && (bitsize_ & (bitsize_ - 1)) == 0 && "Bitsize must be a power of two");
}
constexpr int index() const { return m_index; }
constexpr unsigned bitsize() const { return m_bitsize; }
constexpr bool is_vector() const { return m_is_vector; }
private:
int m_index : 8;
unsigned m_bitsize : 8;
bool m_is_vector;
};
struct RReg : public Reg {
constexpr explicit RReg(unsigned bitsize_, int index_)
: Reg(false, bitsize_, index_)
{
assert(bitsize_ == 32 || bitsize_ == 64);
}
XReg toX() const;
WReg toW() const;
template<typename Policy>
friend class BasicCodeGenerator;
};
struct ZrReg : public RReg {
constexpr explicit ZrReg()
: RReg(64, 31) {}
};
struct WzrReg : public RReg {
constexpr explicit WzrReg()
: RReg(32, 31) {}
};
struct XReg : public RReg {
constexpr explicit XReg(int index_)
: RReg(64, index_) {}
constexpr /* implicit */ XReg(ZrReg)
: RReg(64, 31) {}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct WReg : public RReg {
constexpr explicit WReg(int index_)
: RReg(32, index_) {}
constexpr /* implicit */ WReg(WzrReg)
: RReg(32, 31) {}
template<typename Policy>
friend class BasicCodeGenerator;
};
inline XReg RReg::toX() const
{
if (index() == -1)
throw "cannot convert SP/WSP to XReg";
return XReg{index()};
}
inline WReg RReg::toW() const
{
if (index() == -1)
throw "cannot convert SP/WSP to WReg";
return WReg{index()};
}
struct SpReg : public RReg {
constexpr explicit SpReg()
: RReg(64, -1) {}
};
struct WspReg : public RReg {
constexpr explicit WspReg()
: RReg(64, -1) {}
};
struct XRegSp : public RReg {
constexpr /* implict */ XRegSp(SpReg)
: RReg(64, -1) {}
constexpr /* implict */ XRegSp(XReg xr)
: RReg(64, xr.index())
{
if (xr.index() == 31)
throw "unexpected ZR passed into an XRegSp";
}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct WRegWsp : public RReg {
constexpr /* implict */ WRegWsp(WspReg)
: RReg(32, -1) {}
constexpr /* implict */ WRegWsp(WReg wr)
: RReg(32, wr.index())
{
if (wr.index() == 31)
throw "unexpected WZR passed into an WRegWsp";
}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg : public Reg {
constexpr explicit VReg(unsigned bitsize_, int index_)
: Reg(true, bitsize_, index_)
{
assert(bitsize_ == 8 || bitsize_ == 16 || bitsize_ == 32 || bitsize_ == 64 || bitsize_ == 128);
}
constexpr BReg toB() const;
constexpr HReg toH() const;
constexpr SReg toS() const;
constexpr DReg toD() const;
constexpr QReg toQ() const;
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VRegArranged : public Reg {
constexpr explicit VRegArranged(unsigned bitsize_, int index_, unsigned esize_)
: Reg(true, bitsize_, index_), m_esize(esize_)
{
assert(bitsize_ == 64 || bitsize_ == 128);
assert(esize_ != 0 && (esize_ & (esize_ - 1)) == 0 && "esize must be a power of two");
assert(esize_ <= bitsize_);
}
template<typename Policy>
friend class BasicCodeGenerator;
private:
int m_esize : 8;
};
struct BReg : public VReg {
constexpr explicit BReg(int index_)
: VReg(8, index_)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct HReg : public VReg {
constexpr explicit HReg(int index_)
: VReg(16, index_)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct SReg : public VReg {
constexpr explicit SReg(int index_)
: VReg(32, index_)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct DReg : public VReg {
constexpr explicit DReg(int index_)
: VReg(64, index_)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct QReg : public VReg {
constexpr explicit QReg(int index_)
: VReg(128, index_)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_8B : public VRegArranged {
constexpr explicit VReg_8B(int reg_index_)
: VRegArranged(64, reg_index_, 64 / 8)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_4H : public VRegArranged {
constexpr explicit VReg_4H(int reg_index_)
: VRegArranged(64, reg_index_, 64 / 4)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_2S : public VRegArranged {
constexpr explicit VReg_2S(int reg_index_)
: VRegArranged(64, reg_index_, 64 / 2)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_1D : public VRegArranged {
constexpr explicit VReg_1D(int reg_index_)
: VRegArranged(64, reg_index_, 64 / 1)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_16B : public VRegArranged {
constexpr explicit VReg_16B(int reg_index_)
: VRegArranged(128, reg_index_, 128 / 16)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_8H : public VRegArranged {
constexpr explicit VReg_8H(int reg_index_)
: VRegArranged(128, reg_index_, 128 / 8)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_4S : public VRegArranged {
constexpr explicit VReg_4S(int reg_index_)
: VRegArranged(128, reg_index_, 128 / 4)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_2D : public VRegArranged {
constexpr explicit VReg_2D(int reg_index_)
: VRegArranged(128, reg_index_, 128 / 2)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_1Q : public VRegArranged {
constexpr explicit VReg_1Q(int reg_index_)
: VRegArranged(128, reg_index_, 128 / 1)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct Elem {
constexpr explicit Elem(unsigned esize_, int reg_, unsigned elem_index_)
: m_esize(esize_), m_reg(reg_), m_elem_index(elem_index_)
{
if (elem_index_ >= 128 / esize_)
throw "invalid elem_index";
}
constexpr unsigned esize() const { return m_esize; }
constexpr int reg_index() const { return m_reg; }
constexpr unsigned elem_index() const { return m_elem_index; }
private:
unsigned m_esize;
int m_reg;
unsigned m_elem_index;
};
template<typename E>
struct ElemSelector {
constexpr explicit ElemSelector(int reg_index_)
: m_reg_index(reg_index_)
{}
constexpr int reg_index() const { return m_reg_index; }
constexpr E operator[](unsigned elem_index) const { return E{m_reg_index, elem_index}; }
private:
int m_reg_index;
};
struct BElem : public Elem {
constexpr explicit BElem(int reg_, unsigned elem_index_)
: Elem(2, reg_, elem_index_)
{}
};
struct HElem : public Elem {
constexpr explicit HElem(int reg_, unsigned elem_index_)
: Elem(2, reg_, elem_index_)
{}
};
struct SElem : public Elem {
constexpr explicit SElem(int reg_, unsigned elem_index_)
: Elem(4, reg_, elem_index_)
{}
};
struct DElem : public Elem {
constexpr explicit DElem(int reg_, unsigned elem_index_)
: Elem(8, reg_, elem_index_)
{}
};
struct DElem_1 : public DElem {
constexpr /* implict */ DElem_1(DElem inner)
: DElem(inner)
{
if (inner.elem_index() != 1)
throw "invalid DElem_1";
}
};
constexpr BReg VReg::toB() const
{
return BReg{index()};
}
constexpr HReg VReg::toH() const
{
return HReg{index()};
}
constexpr SReg VReg::toS() const
{
return SReg{index()};
}
constexpr DReg VReg::toD() const
{
return DReg{index()};
}
constexpr QReg VReg::toQ() const
{
return QReg{index()};
}
struct VRegSelector {
constexpr explicit VRegSelector(int reg_index)
: m_reg_index(reg_index)
{}
constexpr int index() const { return m_reg_index; }
constexpr ElemSelector<BElem> B() const { return ElemSelector<BElem>(index()); }
constexpr ElemSelector<HElem> H() const { return ElemSelector<HElem>(index()); }
constexpr ElemSelector<SElem> S() const { return ElemSelector<SElem>(index()); }
constexpr ElemSelector<DElem> D() const { return ElemSelector<DElem>(index()); }
constexpr VReg_8B B8() const { return VReg_8B{index()}; }
constexpr VReg_4H H4() const { return VReg_4H{index()}; }
constexpr VReg_2S S2() const { return VReg_2S{index()}; }
constexpr VReg_1D D1() const { return VReg_1D{index()}; }
constexpr VReg_16B B16() const { return VReg_16B{index()}; }
constexpr VReg_8H H8() const { return VReg_8H{index()}; }
constexpr VReg_4S S4() const { return VReg_4S{index()}; }
constexpr VReg_2D D2() const { return VReg_2D{index()}; }
constexpr VReg_1Q Q1() const { return VReg_1Q{index()}; }
private:
int m_reg_index;
};
} // namespace oaknut

View file

@ -0,0 +1,24 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <algorithm>
#include <cstddef>
namespace oaknut {
template<size_t N>
struct StringLiteral {
constexpr StringLiteral(const char (&str)[N])
{
std::copy_n(str, N, value);
}
static constexpr std::size_t strlen = N - 1;
static constexpr std::size_t size = N;
char value[N];
};
} // namespace oaknut

View file

@ -0,0 +1,306 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#include <bit>
#include <cstddef>
#include <cstdint>
#include <optional>
#include <tuple>
#include <type_traits>
#include <variant>
#include <vector>
#include "oaknut/impl/enum.hpp"
#include "oaknut/impl/imm.hpp"
#include "oaknut/impl/list.hpp"
#include "oaknut/impl/multi_typed_name.hpp"
#include "oaknut/impl/offset.hpp"
#include "oaknut/impl/reg.hpp"
#include "oaknut/impl/string_literal.hpp"
namespace oaknut {
namespace detail {
template<StringLiteral bs, StringLiteral barg>
constexpr std::uint32_t get_bits()
{
std::uint32_t result = 0;
for (std::size_t i = 0; i < 32; i++) {
for (std::size_t a = 0; a < barg.strlen; a++) {
if (bs.value[i] == barg.value[a]) {
result |= 1 << (31 - i);
}
}
}
return result;
}
template<class... Ts>
struct overloaded : Ts... {
using Ts::operator()...;
};
template<class... Ts>
overloaded(Ts...) -> overloaded<Ts...>;
} // namespace detail
struct Label {
public:
Label() = default;
private:
template<typename Policy>
friend class BasicCodeGenerator;
explicit Label(std::uintptr_t addr)
: m_addr(addr)
{}
using EmitFunctionType = std::uint32_t (*)(std::uintptr_t wb_addr, std::uintptr_t resolved_addr);
struct Writeback {
std::uintptr_t m_wb_addr;
std::uint32_t m_mask;
EmitFunctionType m_fn;
};
std::optional<std::uintptr_t> m_addr;
std::vector<Writeback> m_wbs;
};
template<typename Policy>
class BasicCodeGenerator : public Policy {
public:
BasicCodeGenerator(typename Policy::constructor_argument_type arg)
: Policy(arg)
{}
Label l()
{
return Label{Policy::current_address()};
}
void l(Label& label)
{
if (label.m_addr)
throw "label already resolved";
const auto target_addr = Policy::current_address();
label.m_addr = target_addr;
for (auto& wb : label.m_wbs) {
const std::uint32_t value = wb.m_fn(wb.m_wb_addr, target_addr);
Policy::set_at_address(wb.m_wb_addr, value, wb.m_mask);
}
label.m_wbs.clear();
}
#include "oaknut/impl/arm64_mnemonics.inc.hpp"
#include "oaknut/impl/fpsimd_mnemonics.inc.hpp"
void RET()
{
return RET(XReg{30});
}
void MOV(WReg wd, uint32_t imm)
{
if (wd.index() == 31)
return;
if (MovImm16::is_valid(imm))
return MOVZ(wd, imm);
if (MovImm16::is_valid(~static_cast<std::uint64_t>(imm)))
return MOVN(wd, imm);
if (detail::encode_bit_imm(imm))
return ORR(wd, WzrReg{}, imm);
MOVZ(wd, {static_cast<std::uint16_t>(imm >> 0), MovImm16Shift::SHL_0});
MOVK(wd, {static_cast<std::uint16_t>(imm >> 16), MovImm16Shift::SHL_16});
}
void MOV(XReg xd, uint64_t imm)
{
if (xd.index() == 31)
return;
if (imm >> 32 == 0)
return MOV(xd.toW(), static_cast<std::uint32_t>(imm));
if (MovImm16::is_valid(imm))
return MOVZ(xd, imm);
if (MovImm16::is_valid(~imm))
return MOVN(xd, imm);
if (detail::encode_bit_imm(imm))
return ORR(xd, ZrReg{}, imm);
bool movz_done = false;
int shift_count = 0;
if (detail::encode_bit_imm(static_cast<std::uint32_t>(imm))) {
ORR(xd.toW(), WzrReg{}, static_cast<std::uint32_t>(imm));
imm >>= 32;
movz_done = true;
shift_count = 2;
}
while (imm != 0) {
const uint16_t hw = static_cast<uint16_t>(imm);
if (hw != 0) {
if (movz_done) {
MOVK(xd, {hw, static_cast<MovImm16Shift>(shift_count)});
} else {
MOVZ(xd, {hw, static_cast<MovImm16Shift>(shift_count)});
movz_done = true;
}
}
imm >>= 16;
shift_count++;
}
}
private:
#include "oaknut/impl/arm64_encode_helpers.inc.hpp"
template<StringLiteral bs, StringLiteral... bargs, typename... Ts>
void emit(Ts... args)
{
std::uint32_t encoding = detail::get_bits<bs, "1">();
encoding |= (0 | ... | encode<detail::get_bits<bs, bargs>()>(std::forward<Ts>(args)));
Policy::append(encoding);
}
template<std::uint32_t splat, std::size_t size, std::size_t align>
std::uint32_t encode(AddrOffset<size, align> v)
{
static_assert(std::popcount(splat) == size - align);
const auto encode_fn = [](std::uintptr_t current_addr, std::uintptr_t target) {
const std::ptrdiff_t diff = target - current_addr;
return pdep<splat>(AddrOffset<size, align>::encode(diff));
};
return std::visit(detail::overloaded{
[&](std::uint32_t encoding) {
return pdep<splat>(encoding);
},
[&](Label* label) {
if (label->m_addr) {
return encode_fn(Policy::current_address(), *label->m_addr);
}
label->m_wbs.emplace_back(Label::Writeback{Policy::current_address(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)});
return 0u;
},
[&](void* p) {
return encode_fn(Policy::current_address(), reinterpret_cast<std::uintptr_t>(p));
},
},
v.m_payload);
}
template<std::uint32_t splat, std::size_t size>
std::uint32_t encode(PageOffset<size> v)
{
static_assert(std::popcount(splat) == size);
const auto encode_fn = [](std::uintptr_t current_addr, std::uintptr_t target) {
return pdep<splat>(PageOffset<size>::encode(current_addr, target));
};
return std::visit(detail::overloaded{
[&](Label* label) {
if (label->m_addr) {
return encode_fn(Policy::current_address(), *label->m_addr);
}
label->m_wbs.emplace_back(Label::Writeback{Policy::current_address(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)});
return 0u;
},
[&](void* p) {
return encode_fn(Policy::current_address(), reinterpret_cast<std::uintptr_t>(p));
},
},
v.m_payload);
}
};
struct PointerCodeGeneratorPolicy {
public:
template<typename T>
T ptr()
{
static_assert(std::is_pointer_v<T>);
return reinterpret_cast<T>(m_ptr);
}
void set_ptr(std::uint32_t* ptr_)
{
m_ptr = ptr_;
}
protected:
using constructor_argument_type = std::uint32_t*;
PointerCodeGeneratorPolicy(std::uint32_t* ptr_)
: m_ptr(ptr_)
{}
void append(std::uint32_t instruction)
{
*m_ptr++ = instruction;
}
std::uintptr_t current_address()
{
return reinterpret_cast<std::uintptr_t>(m_ptr);
}
void set_at_address(std::uintptr_t addr, std::uint32_t value, std::uint32_t mask)
{
std::uint32_t* p = reinterpret_cast<std::uint32_t*>(addr);
*p = (*p & mask) | value;
}
private:
std::uint32_t* m_ptr;
};
using CodeGenerator = BasicCodeGenerator<PointerCodeGeneratorPolicy>;
namespace util {
inline constexpr WReg W0{0}, W1{1}, W2{2}, W3{3}, W4{4}, W5{5}, W6{6}, W7{7}, W8{8}, W9{9}, W10{10}, W11{11}, W12{12}, W13{13}, W14{14}, W15{15}, W16{16}, W17{17}, W18{18}, W19{19}, W20{20}, W21{21}, W22{22}, W23{23}, W24{24}, W25{25}, W26{26}, W27{27}, W28{28}, W29{29}, W30{30};
inline constexpr XReg X0{0}, X1{1}, X2{2}, X3{3}, X4{4}, X5{5}, X6{6}, X7{7}, X8{8}, X9{9}, X10{10}, X11{11}, X12{12}, X13{13}, X14{14}, X15{15}, X16{16}, X17{17}, X18{18}, X19{19}, X20{20}, X21{21}, X22{22}, X23{23}, X24{24}, X25{25}, X26{26}, X27{27}, X28{28}, X29{29}, X30{30};
inline constexpr ZrReg ZR{}, XZR{};
inline constexpr WzrReg WZR{};
inline constexpr SpReg SP{}, XSP{};
inline constexpr WspReg WSP{};
inline constexpr VRegSelector V0{0}, V1{1}, V2{2}, V3{3}, V4{4}, V5{5}, V6{6}, V7{7}, V8{8}, V9{9}, V10{10}, V11{11}, V12{12}, V13{13}, V14{14}, V15{15}, V16{16}, V17{17}, V18{18}, V19{19}, V20{20}, V21{21}, V22{22}, V23{23}, V24{24}, V25{25}, V26{26}, V27{27}, V28{28}, V29{29}, V30{30}, V31{31};
inline constexpr QReg Q0{0}, Q1{1}, Q2{2}, Q3{3}, Q4{4}, Q5{5}, Q6{6}, Q7{7}, Q8{8}, Q9{9}, Q10{10}, Q11{11}, Q12{12}, Q13{13}, Q14{14}, Q15{15}, Q16{16}, Q17{17}, Q18{18}, Q19{19}, Q20{20}, Q21{21}, Q22{22}, Q23{23}, Q24{24}, Q25{25}, Q26{26}, Q27{27}, Q28{28}, Q29{29}, Q30{30}, Q31{31};
inline constexpr DReg D0{0}, D1{1}, D2{2}, D3{3}, D4{4}, D5{5}, D6{6}, D7{7}, D8{8}, D9{9}, D10{10}, D11{11}, D12{12}, D13{13}, D14{14}, D15{15}, D16{16}, D17{17}, D18{18}, D19{19}, D20{20}, D21{21}, D22{22}, D23{23}, D24{24}, D25{25}, D26{26}, D27{27}, D28{28}, D29{29}, D30{30}, D31{31};
inline constexpr SReg S0{0}, S1{1}, S2{2}, S3{3}, S4{4}, S5{5}, S6{6}, S7{7}, S8{8}, S9{9}, S10{10}, S11{11}, S12{12}, S13{13}, S14{14}, S15{15}, S16{16}, S17{17}, S18{18}, S19{19}, S20{20}, S21{21}, S22{22}, S23{23}, S24{24}, S25{25}, S26{26}, S27{27}, S28{28}, S29{29}, S30{30}, S31{31};
inline constexpr HReg H0{0}, H1{1}, H2{2}, H3{3}, H4{4}, H5{5}, H6{6}, H7{7}, H8{8}, H9{9}, H10{10}, H11{11}, H12{12}, H13{13}, H14{14}, H15{15}, H16{16}, H17{17}, H18{18}, H19{19}, H20{20}, H21{21}, H22{22}, H23{23}, H24{24}, H25{25}, H26{26}, H27{27}, H28{28}, H29{29}, H30{30}, H31{31};
inline constexpr BReg B0{0}, B1{1}, B2{2}, B3{3}, B4{4}, B5{5}, B6{6}, B7{7}, B8{8}, B9{9}, B10{10}, B11{11}, B12{12}, B13{13}, B14{14}, B15{15}, B16{16}, B17{17}, B18{18}, B19{19}, B20{20}, B21{21}, B22{22}, B23{23}, B24{24}, B25{25}, B26{26}, B27{27}, B28{28}, B29{29}, B30{30}, B31{31};
inline constexpr Cond EQ{Cond::EQ}, NE{Cond::NE}, CS{Cond::CS}, CC{Cond::CC}, MI{Cond::MI}, PL{Cond::PL}, VS{Cond::VS}, VC{Cond::VC}, HI{Cond::HI}, LS{Cond::LS}, GE{Cond::GE}, LT{Cond::LT}, GT{Cond::GT}, LE{Cond::LE}, AL{Cond::AL}, NV{Cond::NV}, HS{Cond::HS}, LO{Cond::LO};
inline constexpr auto UXTB{MultiTypedName<AddSubExt::UXTB>{}};
inline constexpr auto UXTH{MultiTypedName<AddSubExt::UXTH>{}};
inline constexpr auto UXTW{MultiTypedName<AddSubExt::UXTW, IndexExt::UXTW>{}};
inline constexpr auto UXTX{MultiTypedName<AddSubExt::UXTX>{}};
inline constexpr auto SXTB{MultiTypedName<AddSubExt::SXTB>{}};
inline constexpr auto SXTH{MultiTypedName<AddSubExt::SXTH>{}};
inline constexpr auto SXTW{MultiTypedName<AddSubExt::SXTW, IndexExt::SXTW>{}};
inline constexpr auto SXTX{MultiTypedName<AddSubExt::SXTX, IndexExt::SXTX>{}};
inline constexpr auto LSL{MultiTypedName<AddSubExt::LSL, IndexExt::LSL, AddSubShift::LSL, LogShift::LSL, LslSymbol::LSL>{}};
inline constexpr auto LSR{MultiTypedName<AddSubShift::LSR, LogShift::LSR>{}};
inline constexpr auto ASR{MultiTypedName<AddSubShift::ASR, LogShift::ASR>{}};
inline constexpr auto ROR{MultiTypedName<LogShift::ROR>{}};
inline constexpr PostIndexed POST_INDEXED{};
inline constexpr PreIndexed PRE_INDEXED{};
inline constexpr MslSymbol MSL{MslSymbol::MSL};
} // namespace util
} // namespace oaknut

View file

@ -0,0 +1,117 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#include <cstdint>
#include <cstdio>
#include <catch2/catch_test_macros.hpp>
#include "oaknut/code_block.hpp"
#include "oaknut/oaknut.hpp"
#include "rand_int.hpp"
using namespace oaknut;
using namespace oaknut::util;
TEST_CASE("Basic Test")
{
CodeBlock mem{4096};
CodeGenerator code{mem.ptr()};
mem.unprotect();
code.MOV(W0, 42);
code.RET();
mem.protect();
mem.invalidate_all();
int result = ((int (*)())mem.ptr())();
REQUIRE(result == 42);
}
TEST_CASE("Fibonacci")
{
CodeBlock mem{4096};
CodeGenerator code{mem.ptr()};
mem.unprotect();
auto fib = code.ptr<int (*)(int)>();
Label start, end, zero, recurse;
code.l(start);
code.STP(X29, X30, SP, PRE_INDEXED, -32);
code.STP(X20, X19, SP, 16);
code.MOV(X29, SP);
code.MOV(W19, W0);
code.SUBS(W0, W0, 1);
code.B(LT, zero);
code.B(NE, recurse);
code.MOV(W0, 1);
code.B(end);
code.l(zero);
code.MOV(W0, WZR);
code.B(end);
code.l(recurse);
code.BL(start);
code.MOV(W20, W0);
code.SUB(W0, W19, 2);
code.BL(start);
code.ADD(W0, W0, W20);
code.l(end);
code.LDP(X20, X19, SP, 16);
code.LDP(X29, X30, SP, POST_INDEXED, 32);
code.RET();
mem.protect();
mem.invalidate_all();
REQUIRE(fib(0) == 0);
REQUIRE(fib(1) == 1);
REQUIRE(fib(5) == 5);
REQUIRE(fib(9) == 34);
}
TEST_CASE("Immediate generation (32-bit)")
{
CodeBlock mem{4096};
for (int i = 0; i < 0x100000; i++) {
const std::uint32_t value = RandInt<std::uint32_t>(0, 0xffffffff);
CodeGenerator code{mem.ptr()};
auto f = code.ptr<std::uint64_t (*)()>();
mem.unprotect();
code.MOV(W0, value);
code.RET();
mem.protect();
mem.invalidate_all();
REQUIRE(f() == value);
}
}
TEST_CASE("Immediate generation (64-bit)")
{
CodeBlock mem{4096};
for (int i = 0; i < 0x100000; i++) {
const std::uint64_t value = RandInt<std::uint64_t>(0, 0xffffffff'ffffffff);
CodeGenerator code{mem.ptr()};
auto f = code.ptr<std::uint64_t (*)()>();
mem.unprotect();
code.MOV(X0, value);
code.RET();
mem.protect();
mem.invalidate_all();
REQUIRE(f() == value);
}
}

View file

@ -0,0 +1,785 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#include <array>
#include <cstdint>
#include <catch2/catch_test_macros.hpp>
#include "oaknut/oaknut.hpp"
#define T(HEX, CMD) \
TEST_CASE(#CMD) \
{ \
using namespace oaknut; \
using namespace oaknut::util; \
\
std::uint32_t result; \
CodeGenerator code{&result}; \
\
code.CMD; \
\
REQUIRE(result == HEX); \
}
T(0x5ee0bb61, ABS(D1, D27))
T(0x4e20ba03, ABS(V3.B16(), V16.B16()))
T(0x5ef98449, ADD(D9, D2, D25))
T(0x4eef8697, ADD(V23.D2(), V20.D2(), V15.D2()))
T(0x0eb743d1, ADDHN(V17.S2(), V30.D2(), V23.D2()))
T(0x5ef1b933, ADDP(D19, V9.D2()))
T(0x0e7ebf6e, ADDP(V14.H4(), V27.H4(), V30.H4()))
T(0x4e31ba47, ADDV(B7, V18.B16()))
// AESD
// AESE
// AESIMC
// AESMC
T(0x4e2b1d4c, AND(V12.B16(), V10.B16(), V11.B16()))
T(0x6f01b7f4, BIC(V20.H8(), 63, LSL, 8))
T(0x2f017752, BIC(V18.S2(), 58, LSL, 24))
T(0x0e751c85, BIC(V5.B8(), V4.B8(), V21.B8()))
T(0x2ef11d4d, BIF(V13.B8(), V10.B8(), V17.B8()))
T(0x2eb31f3b, BIT(V27.B8(), V25.B8(), V19.B8()))
T(0x2e711ed8, BSL(V24.B8(), V22.B8(), V17.B8()))
T(0x0e604aaf, CLS(V15.H4(), V21.H4()))
T(0x6e604808, CLZ(V8.H8(), V0.H8()))
T(0x7eff8ec2, CMEQ(D2, D22, D31))
T(0x2e2b8d57, CMEQ(V23.B8(), V10.B8(), V11.B8()))
T(0x5ee09bbf, CMEQ(D31, D29, 0))
T(0x4ea09876, CMEQ(V22.S4(), V3.S4(), 0))
T(0x5ef23c04, CMGE(D4, D0, D18))
T(0x4e203c6f, CMGE(V15.B16(), V3.B16(), V0.B16()))
T(0x7ee08822, CMGE(D2, D1, 0))
T(0x2ea08bb9, CMGE(V25.S2(), V29.S2(), 0))
T(0x5ef036a5, CMGT(D5, D21, D16))
T(0x0eb7358b, CMGT(V11.S2(), V12.S2(), V23.S2()))
T(0x5ee08957, CMGT(D23, D10, 0))
T(0x4ea088eb, CMGT(V11.S4(), V7.S4(), 0))
T(0x7ee235f2, CMHI(D18, D15, D2))
T(0x6e243596, CMHI(V22.B16(), V12.B16(), V4.B16()))
T(0x7ef23faf, CMHS(D15, D29, D18))
T(0x2e2d3d8a, CMHS(V10.B8(), V12.B8(), V13.B8()))
T(0x7ee098a4, CMLE(D4, D5, 0))
T(0x2e2098d3, CMLE(V19.B8(), V6.B8(), 0))
T(0x5ee0a980, CMLT(D0, D12, 0))
T(0x4e60a892, CMLT(V18.H8(), V4.H8(), 0))
T(0x5ee18e03, CMTST(D3, D16, D1))
T(0x4e708f65, CMTST(V5.H8(), V27.H8(), V16.H8()))
T(0x4e20598d, CNT(V13.B16(), V12.B16()))
// DUP
T(0x0e0d06cd, DUP(V13.B8(), V22.B()[6]))
T(0x0e010fe7, DUP(V7.B8(), WZR))
T(0x2e2b1e6e, EOR(V14.B8(), V19.B8(), V11.B8()))
T(0x6e1c0a35, EXT(V21.B16(), V17.B16(), V28.B16(), 1))
T(0x7ea0d7a4, FABD(S4, S29, S0))
T(0x6eecd418, FABD(V24.D2(), V0.D2(), V12.D2()))
T(0x1e20c299, FABS(S25, S20))
T(0x1e60c114, FABS(D20, D8))
T(0x4ee0f999, FABS(V25.D2(), V12.D2()))
T(0x7e71ef5b, FACGE(D27, D26, D17))
T(0x6e6eed17, FACGE(V23.D2(), V8.D2(), V14.D2()))
T(0x7ef8efc0, FACGT(D0, D30, D24))
T(0x6eb0ec54, FACGT(V20.S4(), V2.S4(), V16.S4()))
T(0x1e242b23, FADD(S3, S25, S4))
T(0x1e672a8b, FADD(D11, D20, D7))
T(0x4e34d46b, FADD(V11.S4(), V3.S4(), V20.S4()))
T(0x7e30db16, FADDP(S22, V24.S2()))
T(0x6e21d626, FADDP(V6.S4(), V17.S4(), V1.S4()))
T(0x1e23c4ce, FCCMP(S6, S3, 14, GT))
T(0x1e7104aa, FCCMP(D5, D17, 10, EQ))
T(0x1e28c678, FCCMPE(S19, S8, 8, GT))
T(0x1e6195dd, FCCMPE(D14, D1, 13, LS))
T(0x5e31e659, FCMEQ(S25, S18, S17))
T(0x0e27e7ba, FCMEQ(V26.S2(), V29.S2(), V7.S2()))
T(0x5ea0da1d, FCMEQ(S29, S16, 0.0))
T(0x4ee0db8a, FCMEQ(V10.D2(), V28.D2(), 0.0))
T(0x7e2de473, FCMGE(S19, S3, S13))
T(0x2e33e726, FCMGE(V6.S2(), V25.S2(), V19.S2()))
T(0x7ea0c8d3, FCMGE(S19, S6, 0.0))
T(0x6ea0ca7b, FCMGE(V27.S4(), V19.S4(), 0.0))
T(0x7eb7e65d, FCMGT(S29, S18, S23))
T(0x6ef0e6ac, FCMGT(V12.D2(), V21.D2(), V16.D2()))
T(0x5ee0cb5a, FCMGT(D26, D26, 0.0))
T(0x4ea0c917, FCMGT(V23.S4(), V8.S4(), 0.0))
T(0x7ea0dbe1, FCMLE(S1, S31, 0.0))
T(0x6ea0da69, FCMLE(V9.S4(), V19.S4(), 0.0))
T(0x5ea0ea5f, FCMLT(S31, S18, 0.0))
T(0x4ee0e8de, FCMLT(V30.D2(), V6.D2(), 0.0))
T(0x1e322040, FCMP(S2, S18))
T(0x1e202248, FCMP(S18, 0.0))
T(0x1e6520a0, FCMP(D5, D5))
T(0x1e602108, FCMP(D8, 0.0))
T(0x1e332370, FCMPE(S27, S19))
T(0x1e202018, FCMPE(S0, 0.0))
T(0x1e7120b0, FCMPE(D5, D17))
T(0x1e602298, FCMPE(D20, 0.0))
T(0x1e32ed68, FCSEL(S8, S11, S18, AL))
T(0x1e7b1e21, FCSEL(D1, D17, D27, NE))
T(0x1ee24022, FCVT(S2, H1))
T(0x1ee2c33c, FCVT(D28, H25))
T(0x1e23c379, FCVT(H25, S27))
T(0x1e22c1a5, FCVT(D5, S13))
T(0x1e63c2b1, FCVT(H17, D21))
T(0x1e624309, FCVT(S9, D24))
T(0x1e2400a0, FCVTAS(W0, S5))
T(0x9e24000e, FCVTAS(X14, S0))
T(0x1e640191, FCVTAS(W17, D12))
T(0x9e6403d6, FCVTAS(X22, D30))
T(0x5e21c8a7, FCVTAS(S7, S5))
T(0x0e21c8df, FCVTAS(V31.S2(), V6.S2()))
T(0x1e25036b, FCVTAU(W11, S27))
T(0x9e25030c, FCVTAU(X12, S24))
T(0x1e65002e, FCVTAU(W14, D1))
T(0x9e65003e, FCVTAU(X30, D1))
T(0x7e61cabd, FCVTAU(D29, D21))
T(0x2e21c880, FCVTAU(V0.S2(), V4.S2()))
T(0x4e217b66, FCVTL2(V6.S4(), V27.H8()))
T(0x1e30016d, FCVTMS(W13, S11))
T(0x9e3002b5, FCVTMS(X21, S21))
T(0x1e7003dd, FCVTMS(W29, D30))
T(0x9e700080, FCVTMS(X0, D4))
T(0x5e21b9b6, FCVTMS(S22, S13))
T(0x4e61ba4e, FCVTMS(V14.D2(), V18.D2()))
T(0x1e31002d, FCVTMU(W13, S1))
T(0x9e310281, FCVTMU(X1, S20))
T(0x1e71000e, FCVTMU(W14, D0))
T(0x9e710010, FCVTMU(X16, D0))
T(0x7e61bb3b, FCVTMU(D27, D25))
T(0x2e21b918, FCVTMU(V24.S2(), V8.S2()))
T(0x0e616a68, FCVTN(V8.S2(), V19.D2()))
T(0x1e200100, FCVTNS(W0, S8))
T(0x9e20037f, FCVTNS(XZR, S27))
T(0x1e60015e, FCVTNS(W30, D10))
T(0x9e600018, FCVTNS(X24, D0))
T(0x5e61a846, FCVTNS(D6, D2))
T(0x4e21aa81, FCVTNS(V1.S4(), V20.S4()))
T(0x1e210248, FCVTNU(W8, S18))
T(0x9e2103da, FCVTNU(X26, S30))
T(0x1e610120, FCVTNU(W0, D9))
T(0x9e61013a, FCVTNU(X26, D9))
T(0x7e61aaba, FCVTNU(D26, D21))
T(0x6e21aa16, FCVTNU(V22.S4(), V16.S4()))
T(0x1e28010d, FCVTPS(W13, S8))
T(0x9e2803df, FCVTPS(XZR, S30))
T(0x1e6802e9, FCVTPS(W9, D23))
T(0x9e6801f7, FCVTPS(X23, D15))
T(0x5ee1a986, FCVTPS(D6, D12))
T(0x4ea1aa32, FCVTPS(V18.S4(), V17.S4()))
T(0x1e29022b, FCVTPU(W11, S17))
T(0x9e290381, FCVTPU(X1, S28))
T(0x1e690095, FCVTPU(W21, D4))
T(0x9e6902b3, FCVTPU(X19, D21))
T(0x7ea1abbb, FCVTPU(S27, S29))
T(0x6ee1ab06, FCVTPU(V6.D2(), V24.D2()))
T(0x7e61687a, FCVTXN(S26, D3))
T(0x2e61694c, FCVTXN(V12.S2(), V10.D2()))
T(0x1e18c4d6, FCVTZS(W22, S6, 15))
T(0x9e18d131, FCVTZS(X17, S9, 12))
T(0x1e58fd9b, FCVTZS(W27, D12, 1))
T(0x9e5899ee, FCVTZS(X14, D15, 26))
T(0x1e380091, FCVTZS(W17, S4))
T(0x9e380289, FCVTZS(X9, S20))
T(0x1e780117, FCVTZS(W23, D8))
T(0x9e7800f5, FCVTZS(X21, D7))
T(0x5f2fffdb, FCVTZS(S27, S30, 17))
T(0x4f65ff65, FCVTZS(V5.D2(), V27.D2(), 27))
T(0x5ee1b932, FCVTZS(D18, D9))
T(0x4ee1ba41, FCVTZS(V1.D2(), V18.D2()))
T(0x1e19b5d8, FCVTZU(W24, S14, 19))
T(0x9e199462, FCVTZU(X2, S3, 27))
T(0x1e59fca1, FCVTZU(W1, D5, 1))
T(0x9e599bbd, FCVTZU(X29, D29, 26))
T(0x1e3900f6, FCVTZU(W22, S7))
T(0x9e3900b7, FCVTZU(X23, S5))
T(0x1e79031a, FCVTZU(W26, D24))
T(0x9e790248, FCVTZU(X8, D18))
T(0x7f5afd37, FCVTZU(D23, D9, 38))
T(0x2f34fd38, FCVTZU(V24.S2(), V9.S2(), 12))
T(0x7ea1baa3, FCVTZU(S3, S21))
T(0x6ee1b8c1, FCVTZU(V1.D2(), V6.D2()))
T(0x1e3d1999, FDIV(S25, S12, S29))
T(0x1e7e1a4e, FDIV(D14, D18, D30))
T(0x2e2cfe45, FDIV(V5.S2(), V18.S2(), V12.S2()))
T(0x1f114362, FMADD(S2, S27, S17, S16))
T(0x1f482240, FMADD(D0, D18, D8, D8))
T(0x1e234b5f, FMAX(S31, S26, S3))
T(0x1e694894, FMAX(D20, D4, D9))
T(0x4e29f568, FMAX(V8.S4(), V11.S4(), V9.S4()))
T(0x1e2f6a40, FMAXNM(S0, S18, S15))
T(0x1e6d6a99, FMAXNM(D25, D20, D13))
T(0x4e2dc6da, FMAXNM(V26.S4(), V22.S4(), V13.S4()))
T(0x7e30c9b9, FMAXNMP(S25, V13.S2()))
T(0x6e36c794, FMAXNMP(V20.S4(), V28.S4(), V22.S4()))
T(0x6e30c8f6, FMAXNMV(S22, V7.S4()))
T(0x7e30f8dd, FMAXP(S29, V6.S2()))
T(0x6e61f4ab, FMAXP(V11.D2(), V5.D2(), V1.D2()))
T(0x6e30fb85, FMAXV(S5, V28.S4()))
T(0x1e3c5aae, FMIN(S14, S21, S28))
T(0x1e7f58f8, FMIN(D24, D7, D31))
T(0x0eb0f63b, FMIN(V27.S2(), V17.S2(), V16.S2()))
T(0x1e317886, FMINNM(S6, S4, S17))
T(0x1e6e7a5d, FMINNM(D29, D18, D14))
T(0x4ea4c44c, FMINNM(V12.S4(), V2.S4(), V4.S4()))
T(0x7ef0c895, FMINNMP(D21, V4.D2()))
T(0x6efbc4e3, FMINNMP(V3.D2(), V7.D2(), V27.D2()))
T(0x6eb0c93d, FMINNMV(S29, V9.S4()))
T(0x7ef0fa13, FMINP(D19, V16.D2()))
T(0x2eb4f4ac, FMINP(V12.S2(), V5.S2(), V20.S2()))
T(0x6eb0f801, FMINV(S1, V0.S4()))
T(0x5f8219a6, FMLA(S6, S13, V2.S()[2]))
T(0x4fc512a1, FMLA(V1.D2(), V21.D2(), V5.D()[0]))
T(0x4e6bcecf, FMLA(V15.D2(), V22.D2(), V11.D2()))
T(0x5f8a5094, FMLS(S20, S4, V10.S()[0]))
T(0x4fd85b79, FMLS(V25.D2(), V27.D2(), V24.D()[1]))
T(0x0ebacca4, FMLS(V4.S2(), V5.S2(), V26.S2()))
T(0x1e270027, FMOV(S7, W1))
T(0x1e260164, FMOV(W4, S11))
T(0x9e670008, FMOV(D8, X0))
T(0x9eaf03e0, FMOV(V0.D()[1], XZR))
T(0x9e660090, FMOV(X16, D4))
T(0x9eae025f, FMOV(XZR, V18.D()[1]))
T(0x1e204079, FMOV(S25, S3))
T(0x1e6042f8, FMOV(D24, D23))
T(0x1e32f01c, FMOV(S28, FImm8{true, 0b001, 0b0111})) // -5.75
T(0x1e74901e, FMOV(D30, FImm8{true, 0b010, 0b0100})) // -10.0
T(0x0f03f51a, FMOV(V26.S2(), FImm8{false, 0b110, 0b1000})) // 0.75
T(0x6f02f58e, FMOV(V14.D2(), FImm8{false, 0b100, 0b1100})) // 0.21875
T(0x1f0adaf5, FMSUB(S21, S23, S10, S22))
T(0x1f5da840, FMSUB(D0, D2, D29, D10))
T(0x5fa39bba, FMUL(S26, S29, V3.S()[3]))
T(0x4fb89ad2, FMUL(V18.S4(), V22.S4(), V24.S()[3]))
T(0x1e2b0a3c, FMUL(S28, S17, S11))
T(0x1e720933, FMUL(D19, D9, D18))
T(0x6e7edfa3, FMUL(V3.D2(), V29.D2(), V30.D2()))
T(0x5e32dee6, FMULX(S6, S23, S18))
T(0x0e27deec, FMULX(V12.S2(), V23.S2(), V7.S2()))
T(0x7f879a1f, FMULX(S31, S16, V7.S()[2]))
T(0x6fce9836, FMULX(V22.D2(), V1.D2(), V14.D()[1]))
T(0x1e2142cc, FNEG(S12, S22))
T(0x1e61434b, FNEG(D11, D26))
T(0x6ea0fb90, FNEG(V16.S4(), V28.S4()))
T(0x1f361be5, FNMADD(S5, S31, S22, S6))
T(0x1f7a316d, FNMADD(D13, D11, D26, D12))
T(0x1f3e9957, FNMSUB(S23, S10, S30, S6))
T(0x1f79da66, FNMSUB(D6, D19, D25, D22))
T(0x1e208ab5, FNMUL(S21, S21, S0))
T(0x1e6f89eb, FNMUL(D11, D15, D15))
T(0x5ea1da18, FRECPE(S24, S16))
T(0x0ea1d9df, FRECPE(V31.S2(), V14.S2()))
T(0x5e2dfe37, FRECPS(S23, S17, S13))
T(0x0e29fcec, FRECPS(V12.S2(), V7.S2(), V9.S2()))
T(0x5ee1f998, FRECPX(D24, D12))
T(0x1e264106, FRINTA(S6, S8))
T(0x1e664376, FRINTA(D22, D27))
T(0x6e6188a9, FRINTA(V9.D2(), V5.D2()))
T(0x1e27c216, FRINTI(S22, S16))
T(0x1e67c071, FRINTI(D17, D3))
T(0x6ea19b9d, FRINTI(V29.S4(), V28.S4()))
T(0x1e25413e, FRINTM(S30, S9))
T(0x1e6541a1, FRINTM(D1, D13))
T(0x4e619ad8, FRINTM(V24.D2(), V22.D2()))
T(0x1e244098, FRINTN(S24, S4))
T(0x1e6440b4, FRINTN(D20, D5))
T(0x4e618835, FRINTN(V21.D2(), V1.D2()))
T(0x1e24c188, FRINTP(S8, S12))
T(0x1e64c292, FRINTP(D18, D20))
T(0x0ea18a69, FRINTP(V9.S2(), V19.S2()))
T(0x1e274146, FRINTX(S6, S10))
T(0x1e674333, FRINTX(D19, D25))
T(0x6e619902, FRINTX(V2.D2(), V8.D2()))
T(0x1e25c2b2, FRINTZ(S18, S21))
T(0x1e65c008, FRINTZ(D8, D0))
T(0x0ea19918, FRINTZ(V24.S2(), V8.S2()))
T(0x7ea1dbdb, FRSQRTE(S27, S30))
T(0x6ee1d8df, FRSQRTE(V31.D2(), V6.D2()))
T(0x5ee0ff40, FRSQRTS(D0, D26, D0))
T(0x4eb6fe31, FRSQRTS(V17.S4(), V17.S4(), V22.S4()))
T(0x1e21c204, FSQRT(S4, S16))
T(0x1e61c31c, FSQRT(D28, D24))
T(0x6ea1fa1f, FSQRT(V31.S4(), V16.S4()))
T(0x1e273b28, FSUB(S8, S25, S7))
T(0x1e6139b9, FSUB(D25, D13, D1))
T(0x0eadd6b0, FSUB(V16.S2(), V21.S2(), V13.S2()))
// INS
// INS
T(0x0c407b24, LD1(List{V4.S2()}, X25))
T(0x4c40a891, LD1(List{V17.S4(), V18.S4()}, X4))
T(0x0c406d31, LD1(List{V17.D1(), V18.D1(), V19.D1()}, X9))
T(0x4c402b00, LD1(List{V0.S4(), V1.S4(), V2.S4(), V3.S4()}, X24))
T(0x4cdf72c8, LD1(List{V8.B16()}, X22, POST_INDEXED, 16))
T(0x0cd67504, LD1(List{V4.H4()}, X8, POST_INDEXED, X22))
T(0x0cdfaeb7, LD1(List{V23.D1(), V24.D1()}, X21, POST_INDEXED, 16))
T(0x0cd0a837, LD1(List{V23.S2(), V24.S2()}, X1, POST_INDEXED, X16))
T(0x4cdf6d36, LD1(List{V22.D2(), V23.D2(), V24.D2()}, X9, POST_INDEXED, 48))
T(0x0cdc685b, LD1(List{V27.S2(), V28.S2(), V29.S2()}, X2, POST_INDEXED, X28))
T(0x0cdf2ebc, LD1(List{V28.D1(), V29.D1(), V30.D1(), V31.D1()}, X21, POST_INDEXED, 32))
T(0x0cc0260c, LD1(List{V12.H4(), V13.H4(), V14.H4(), V15.H4()}, X16, POST_INDEXED, X0))
T(0x0d400665, LD1(List{V5.B()}[1], X19))
T(0x0d4041da, LD1(List{V26.H()}[0], X14))
T(0x0d40815b, LD1(List{V27.S()}[0], X10))
T(0x0d408755, LD1(List{V21.D()}[0], X26))
T(0x4ddf0966, LD1(List{V6.B()}[10], X11, POST_INDEXED, 1))
T(0x4dcc1951, LD1(List{V17.B()}[14], X10, POST_INDEXED, X12))
T(0x0ddf58cf, LD1(List{V15.H()}[3], X6, POST_INDEXED, 2))
T(0x0dd14a3d, LD1(List{V29.H()}[1], X17, POST_INDEXED, X17))
T(0x0ddf8072, LD1(List{V18.S()}[0], X3, POST_INDEXED, 4))
T(0x4dcb90bb, LD1(List{V27.S()}[3], X5, POST_INDEXED, X11))
T(0x4ddf8537, LD1(List{V23.D()}[1], X9, POST_INDEXED, 8))
T(0x0dcf8784, LD1(List{V4.D()}[0], X28, POST_INDEXED, X15))
T(0x0d40c0f1, LD1R(List{V17.B8()}, X7))
T(0x0ddfceac, LD1R(List{V12.D1()}, X21, POST_INDEXED, 8))
T(0x4dd5c9c2, LD1R(List{V2.S4()}, X14, POST_INDEXED, X21))
T(0x0c408bc8, LD2(List{V8.S2(), V9.S2()}, X30))
T(0x0cdf842a, LD2(List{V10.H4(), V11.H4()}, X1, POST_INDEXED, 16))
T(0x0cd58678, LD2(List{V24.H4(), V25.H4()}, X19, POST_INDEXED, X21))
T(0x0d60132f, LD2(List{V15.B(), V16.B()}[4], X25))
T(0x4d605156, LD2(List{V22.H(), V23.H()}[6], X10))
T(0x0d609293, LD2(List{V19.S(), V20.S()}[1], X20))
T(0x4d608599, LD2(List{V25.D(), V26.D()}[1], X12))
T(0x4dff0bd6, LD2(List{V22.B(), V23.B()}[10], X30, POST_INDEXED, 2))
T(0x0df90bab, LD2(List{V11.B(), V12.B()}[2], X29, POST_INDEXED, X25))
T(0x4dff42c3, LD2(List{V3.H(), V4.H()}[4], X22, POST_INDEXED, 4))
T(0x4dfa5816, LD2(List{V22.H(), V23.H()}[7], X0, POST_INDEXED, X26))
T(0x4dff9372, LD2(List{V18.S(), V19.S()}[3], X27, POST_INDEXED, 8))
T(0x4de483c0, LD2(List{V0.S(), V1.S()}[2], X30, POST_INDEXED, X4))
T(0x4dff8714, LD2(List{V20.D(), V21.D()}[1], X24, POST_INDEXED, 16))
T(0x4dfa854d, LD2(List{V13.D(), V14.D()}[1], X10, POST_INDEXED, X26))
T(0x4d60ca33, LD2R(List{V19.S4(), V20.S4()}, X17))
T(0x0dffc777, LD2R(List{V23.H4(), V24.H4()}, X27, POST_INDEXED, 4))
T(0x4de9c3cd, LD2R(List{V13.B16(), V14.B16()}, X30, POST_INDEXED, X9))
T(0x0c404032, LD3(List{V18.B8(), V19.B8(), V20.B8()}, X1))
T(0x0cdf4bc8, LD3(List{V8.S2(), V9.S2(), V10.S2()}, X30, POST_INDEXED, 24))
T(0x4ccb4960, LD3(List{V0.S4(), V1.S4(), V2.S4()}, X11, POST_INDEXED, X11))
T(0x0d40217c, LD3(List{V28.B(), V29.B(), V30.B()}[0], X11))
T(0x4d407a38, LD3(List{V24.H(), V25.H(), V26.H()}[7], X17))
T(0x4d40a119, LD3(List{V25.S(), V26.S(), V27.S()}[2], X8))
T(0x0d40a6bb, LD3(List{V27.D(), V28.D(), V29.D()}[0], X21))
T(0x4ddf2bb1, LD3(List{V17.B(), V18.B(), V19.B()}[10], X29, POST_INDEXED, 3))
T(0x4dc13519, LD3(List{V25.B(), V26.B(), V27.B()}[13], X8, POST_INDEXED, X1))
T(0x4ddf6b3f, LD3(List{V31.H(), V0.H(), V1.H()}[5], X25, POST_INDEXED, 6))
T(0x4dc16243, LD3(List{V3.H(), V4.H(), V5.H()}[4], X18, POST_INDEXED, X1))
T(0x4ddfa329, LD3(List{V9.S(), V10.S(), V11.S()}[2], X25, POST_INDEXED, 12))
T(0x4ddab328, LD3(List{V8.S(), V9.S(), V10.S()}[3], X25, POST_INDEXED, X26))
T(0x4ddfa4e4, LD3(List{V4.D(), V5.D(), V6.D()}[1], X7, POST_INDEXED, 24))
T(0x0ddba58c, LD3(List{V12.D(), V13.D(), V14.D()}[0], X12, POST_INDEXED, X27))
T(0x0d40e3b3, LD3R(List{V19.B8(), V20.B8(), V21.B8()}, X29))
T(0x0ddfe2f3, LD3R(List{V19.B8(), V20.B8(), V21.B8()}, X23, POST_INDEXED, 3))
T(0x0ddbe8e4, LD3R(List{V4.S2(), V5.S2(), V6.S2()}, X7, POST_INDEXED, X27))
T(0x4c400a69, LD4(List{V9.S4(), V10.S4(), V11.S4(), V12.S4()}, X19))
T(0x0cdf0bea, LD4(List{V10.S2(), V11.S2(), V12.S2(), V13.S2()}, SP, POST_INDEXED, 32))
T(0x4cd705ad, LD4(List{V13.H8(), V14.H8(), V15.H8(), V16.H8()}, X13, POST_INDEXED, X23))
T(0x0d603b97, LD4(List{V23.B(), V24.B(), V25.B(), V26.B()}[6], X28))
T(0x0d606941, LD4(List{V1.H(), V2.H(), V3.H(), V4.H()}[1], X10))
T(0x0d60a039, LD4(List{V25.S(), V26.S(), V27.S(), V28.S()}[0], X1))
T(0x4d60a4c5, LD4(List{V5.D(), V6.D(), V7.D(), V8.D()}[1], X6))
T(0x0dff2139, LD4(List{V25.B(), V26.B(), V27.B(), V28.B()}[0], X9, POST_INDEXED, 4))
T(0x4df32513, LD4(List{V19.B(), V20.B(), V21.B(), V22.B()}[9], X8, POST_INDEXED, X19))
T(0x0dff7b45, LD4(List{V5.H(), V6.H(), V7.H(), V8.H()}[3], X26, POST_INDEXED, 8))
T(0x0dfa6839, LD4(List{V25.H(), V26.H(), V27.H(), V28.H()}[1], X1, POST_INDEXED, X26))
T(0x4dffa176, LD4(List{V22.S(), V23.S(), V24.S(), V25.S()}[2], X11, POST_INDEXED, 16))
T(0x4de0a125, LD4(List{V5.S(), V6.S(), V7.S(), V8.S()}[2], X9, POST_INDEXED, X0))
T(0x0dffa4ab, LD4(List{V11.D(), V12.D(), V13.D(), V14.D()}[0], X5, POST_INDEXED, 32))
T(0x0dfba784, LD4(List{V4.D(), V5.D(), V6.D(), V7.D()}[0], X28, POST_INDEXED, X27))
T(0x4d60ef82, LD4R(List{V2.D2(), V3.D2(), V4.D2(), V5.D2()}, X28))
T(0x0dffef23, LD4R(List{V3.D1(), V4.D1(), V5.D1(), V6.D1()}, X25, POST_INDEXED, 32))
T(0x4df5e36a, LD4R(List{V10.B16(), V11.B16(), V12.B16(), V13.B16()}, X27, POST_INDEXED, X21))
T(0x2c6dde58, LDNP(S24, S23, X18, -148))
T(0x6c5f8ad5, LDNP(D21, D2, X22, 504))
T(0xac793251, LDNP(Q17, Q12, X18, -224))
T(0x2cf1b345, LDP(S5, S12, X26, POST_INDEXED, -116))
T(0x6cc9489a, LDP(D26, D18, X4, POST_INDEXED, 144))
T(0xace34b69, LDP(Q9, Q18, X27, POST_INDEXED, -928))
T(0x2dca159f, LDP(S31, S5, X12, PRE_INDEXED, 80))
T(0x6df9682d, LDP(D13, D26, X1, PRE_INDEXED, -112))
T(0xadc7566f, LDP(Q15, Q21, X19, PRE_INDEXED, 224))
T(0x2d4efb01, LDP(S1, S30, X24, 116))
T(0x6d710b5a, LDP(D26, D2, X26, -240))
T(0xad74fbb0, LDP(Q16, Q30, X29, -368))
T(0x3c5b76a9, LDR(B9, X21, POST_INDEXED, -73))
T(0x7c5fd798, LDR(H24, X28, POST_INDEXED, -3))
T(0xbc4336b6, LDR(S22, X21, POST_INDEXED, 51))
T(0xfc53b4d5, LDR(D21, X6, POST_INDEXED, -197))
T(0x3cdf571d, LDR(Q29, X24, POST_INDEXED, -11))
T(0x3c5baf77, LDR(B23, X27, PRE_INDEXED, -70))
T(0x7c41bc79, LDR(H25, X3, PRE_INDEXED, 27))
T(0xbc48ecb2, LDR(S18, X5, PRE_INDEXED, 142))
T(0xfc4b1dee, LDR(D14, X15, PRE_INDEXED, 177))
T(0x3cc31c6a, LDR(Q10, X3, PRE_INDEXED, 49))
T(0x3d5a0ef6, LDR(B22, X23, 1667))
T(0x7d5d8dd7, LDR(H23, X14, 3782))
T(0xbd55d41a, LDR(S26, X0, 5588))
T(0xfd58c566, LDR(D6, X11, 12680))
T(0x3dce966e, LDR(Q14, X19, 14928))
T(0x1c8599c0, LDR(S0, -1002696))
T(0x5c8a1ca4, LDR(D4, -965740))
T(0x9cfd90fa, LDR(Q26, -19940))
T(0x3c634a12, LDR(B18, X16, W3, UXTW))
T(0x3c7368e7, LDR(B7, X7, X19, LSL, 0))
T(0x7c646a38, LDR(H24, X17, X4))
T(0xbc727bda, LDR(S26, X30, X18, LSL, 2))
T(0xfc63eb36, LDR(D22, X25, X3, SXTX))
T(0x3ce2ca06, LDR(Q6, X16, W2, SXTW))
T(0x3c4233e6, LDUR(B6, SP, 35))
T(0x7c4d52f1, LDUR(H17, X23, 213))
T(0xbc5be12f, LDUR(S15, X9, -66))
T(0xfc474197, LDUR(D23, X12, 116))
T(0x3cd703db, LDUR(Q27, X30, -144))
T(0x2f9a0354, MLA(V20.S2(), V26.S2(), V26.S()[0]))
T(0x4e7e9643, MLA(V3.H8(), V18.H8(), V30.H8()))
T(0x2f80484e, MLS(V14.S2(), V2.S2(), V0.S()[2]))
T(0x6ebb9572, MLS(V18.S4(), V11.S4(), V27.S4()))
T(0x6e135ec1, MOV(V1.B()[9], V22.B()[11]))
T(0x4e0f1da9, MOV(V9.B()[7], W13))
T(0x5e0e045d, MOV(H29, V2.H()[3]))
T(0x0e043ca1, MOV(W1, V5.S()[0]))
T(0x4e083df7, MOV(X23, V15.D()[0]))
// MOV
T(0x0f06e58e, MOVI(V14.B8(), 204))
T(0x4f058559, MOVI(V25.H8(), 170))
T(0x0f030565, MOVI(V5.S2(), 107))
T(0x0f05c4dc, MOVI(V28.S2(), 166, MSL, 8))
T(0x2f07e47e, MOVI(D30, RepImm{0b11100011})) //
T(0x6f03e65b, MOVI(V27.D2(), RepImm{0b01110010})) //
T(0x0f9e813e, MUL(V30.S2(), V9.S2(), V30.S()[0]))
T(0x4ea59f8e, MUL(V14.S4(), V28.S4(), V5.S4()))
T(0x2e205acd, MVN(V13.B8(), V22.B8()))
T(0x2f0084e1, MVNI(V1.H4(), 7))
T(0x6f026602, MVNI(V2.S4(), 80, LSL, 24))
T(0x2f03c71a, MVNI(V26.S2(), 120, MSL, 8))
T(0x7ee0ba9e, NEG(D30, D20))
T(0x2ea0b9f7, NEG(V23.S2(), V15.S2()))
// NOT
T(0x4ef81f0f, ORN(V15.B16(), V24.B16(), V24.B16()))
T(0x4f03b4e0, ORR(V0.H8(), 103, LSL, 8))
T(0x4f043508, ORR(V8.S4(), 136, LSL, 8))
T(0x4eb21c9c, ORR(V28.B16(), V4.B16(), V18.B16()))
T(0x2e279d77, PMUL(V23.B8(), V11.B8(), V7.B8()))
T(0x4e27e299, PMULL2(V25.H8(), V20.B16(), V7.B16()))
T(0x2eab4048, RADDHN(V8.S2(), V2.D2(), V11.D2()))
T(0x6e605b7e, RBIT(V30.B16(), V27.B16()))
T(0x0e201b37, REV16(V23.B8(), V25.B8()))
T(0x6e60098a, REV32(V10.H8(), V12.H8()))
T(0x0e2009de, REV64(V30.B8(), V14.B8()))
T(0x4f218e4e, RSHRN2(V14.S4(), V18.D2(), 31))
T(0x6e7460f2, RSUBHN2(V18.H8(), V7.S4(), V20.S4()))
T(0x0e377f74, SABA(V20.B8(), V27.B8(), V23.B8()))
T(0x4ea851f6, SABAL2(V22.D2(), V15.S4(), V8.S4()))
T(0x0e777752, SABD(V18.H4(), V26.H4(), V23.H4()))
T(0x0eba7005, SABDL(V5.D2(), V0.S2(), V26.S2()))
T(0x4e2069c4, SADALP(V4.H8(), V14.B16()))
T(0x4e270017, SADDL2(V23.H8(), V0.B16(), V7.B16()))
T(0x0ea028ca, SADDLP(V10.D1(), V6.S2()))
T(0x4e703b2a, SADDLV(S10, V25.H8()))
T(0x0e6311d2, SADDW(V18.S4(), V14.S4(), V3.H4()))
T(0x1e02c782, SCVTF(S2, W28, 15))
T(0x1e42d0e2, SCVTF(D2, W7, 12))
T(0x9e02e80e, SCVTF(S14, X0, 6))
T(0x9e423dda, SCVTF(D26, X14, 49))
T(0x1e2202f3, SCVTF(S19, W23))
T(0x1e6201e7, SCVTF(D7, W15))
T(0x9e22016c, SCVTF(S12, X11))
T(0x9e620316, SCVTF(D22, X24))
T(0x5f34e509, SCVTF(S9, S8, 12))
T(0x4f5ae716, SCVTF(V22.D2(), V24.D2(), 38))
T(0x5e61d946, SCVTF(D6, D10))
T(0x4e61d86b, SCVTF(V11.D2(), V3.D2()))
// SHA1C
// SHA1H
// SHA1M
// SHA1P
// SHA1SU0
// SHA1SU1
// SHA256H
// SHA256H2
// SHA256SU0
// SHA256SU1
T(0x4eb90506, SHADD(V6.S4(), V8.S4(), V25.S4()))
T(0x5f4d5767, SHL(D7, D27, 13))
T(0x4f1f542f, SHL(V15.H8(), V1.H8(), 15))
T(0x2ea13a71, SHLL(V17.D2(), V19.S2(), 32))
T(0x4f0885fd, SHRN2(V29.B16(), V15.H8(), 8))
T(0x0eb42794, SHSUB(V20.S2(), V28.S2(), V20.S2()))
T(0x7f5f54ad, SLI(D13, D5, 31))
T(0x6f09554e, SLI(V14.B16(), V10.B16(), 1))
T(0x0e316452, SMAX(V18.B8(), V2.B8(), V17.B8()))
T(0x4e66a478, SMAXP(V24.H8(), V3.H8(), V6.H8()))
T(0x0e30a9e6, SMAXV(B6, V15.B8()))
T(0x4e276e2a, SMIN(V10.B16(), V17.B16(), V7.B16()))
T(0x4e29ad73, SMINP(V19.B16(), V11.B16(), V9.B16()))
T(0x0e71aac5, SMINV(H5, V22.H4()))
T(0x4f9f2b00, SMLAL2(V0.D2(), V24.S4(), V31.S()[2]))
T(0x4e788037, SMLAL2(V23.S4(), V1.H8(), V24.H8()))
T(0x4f7362b9, SMLSL2(V25.S4(), V21.H8(), V3.H()[3]))
T(0x0e31a0d5, SMLSL(V21.H8(), V6.B8(), V17.B8()))
T(0x0e162fc3, SMOV(W3, V30.H()[5]))
T(0x4e0a2cf2, SMOV(X18, V7.H()[2]))
T(0x0f6ba85c, SMULL(V28.S4(), V2.H4(), V11.H()[6]))
T(0x4e61c2a1, SMULL2(V1.S4(), V21.H8(), V1.H8()))
T(0x5e20794c, SQABS(B12, B10))
T(0x4e607b9b, SQABS(V27.H8(), V28.H8()))
T(0x5eb50df4, SQADD(S20, S15, S21))
T(0x0e370ff4, SQADD(V20.B8(), V31.B8(), V23.B8()))
T(0x5fab3a4e, SQDMLAL(D14, S18, V11.S()[3]))
T(0x4f5b3805, SQDMLAL2(V5.S4(), V0.H8(), V11.H()[5]))
T(0x5e7f90ed, SQDMLAL(S13, H7, H31))
T(0x0ea992b2, SQDMLAL(V18.D2(), V21.S2(), V9.S2()))
T(0x5f867ba2, SQDMLSL(D2, S29, V6.S()[2]))
T(0x4f997118, SQDMLSL2(V24.D2(), V8.S4(), V25.S()[0]))
T(0x5e62b0b2, SQDMLSL(S18, H5, H2))
T(0x0e74b089, SQDMLSL(V9.S4(), V4.H4(), V20.H4()))
T(0x5f5acb3c, SQDMULH(H28, H25, V10.H()[5]))
T(0x4f7bc13d, SQDMULH(V29.H8(), V9.H8(), V11.H()[3]))
T(0x5e6ab724, SQDMULH(H4, H25, H10))
T(0x4ea6b543, SQDMULH(V3.S4(), V10.S4(), V6.S4()))
T(0x5f89b899, SQDMULL(D25, S4, V9.S()[2]))
T(0x0f53b2ee, SQDMULL(V14.S4(), V23.H4(), V3.H()[1]))
T(0x5e60d01a, SQDMULL(S26, H0, H0))
T(0x0eb4d146, SQDMULL(V6.D2(), V10.S2(), V20.S2()))
T(0x7ee07b81, SQNEG(D1, D28))
T(0x2e607a04, SQNEG(V4.H4(), V16.H4()))
T(0x5f47dac8, SQRDMULH(H8, H22, V7.H()[4]))
T(0x0f45db93, SQRDMULH(V19.H4(), V28.H4(), V5.H()[4]))
T(0x7ea3b621, SQRDMULH(S1, S17, S3))
T(0x6ea2b672, SQRDMULH(V18.S4(), V19.S4(), V2.S4()))
T(0x5e7c5ee7, SQRSHL(H7, H23, H28))
T(0x4e655e4b, SQRSHL(V11.H8(), V18.H8(), V5.H8()))
T(0x5f0c9c10, SQRSHRN(B16, H0, 4))
T(0x4f309e99, SQRSHRN2(V25.S4(), V20.D2(), 16))
T(0x7f1f8de7, SQRSHRUN(H7, S15, 1))
T(0x6f178f67, SQRSHRUN2(V7.H8(), V27.S4(), 9))
T(0x5f7977b8, SQSHL(D24, D29, 57))
T(0x4f1e75f3, SQSHL(V19.H8(), V15.H8(), 14))
T(0x5eb24f5d, SQSHL(S29, S26, S18))
T(0x4e7c4c93, SQSHL(V19.H8(), V4.H8(), V28.H8()))
T(0x7f2e66a1, SQSHLU(S1, S21, 14))
T(0x6f4c65a2, SQSHLU(V2.D2(), V13.D2(), 12))
T(0x5f3f950b, SQSHRN(S11, D8, 1))
T(0x4f329646, SQSHRN2(V6.S4(), V18.D2(), 14))
T(0x7f188469, SQSHRUN(H9, S3, 8))
T(0x6f328478, SQSHRUN2(V24.S4(), V3.D2(), 14))
T(0x5e362dae, SQSUB(B14, B13, B22))
T(0x0e3c2c86, SQSUB(V6.B8(), V4.B8(), V28.B8()))
T(0x5ea149fc, SQXTN(S28, D15))
T(0x4e214b24, SQXTN2(V4.B16(), V25.H8()))
T(0x7e61290e, SQXTUN(H14, S8))
T(0x6ea12b96, SQXTUN2(V22.S4(), V28.D2()))
T(0x4eae1673, SRHADD(V19.S4(), V19.S4(), V14.S4()))
T(0x7f794647, SRI(D7, D18, 7))
T(0x6f654787, SRI(V7.D2(), V28.D2(), 27))
T(0x5ee0549e, SRSHL(D30, D4, D0))
T(0x4eba55d2, SRSHL(V18.S4(), V14.S4(), V26.S4()))
T(0x5f712744, SRSHR(D4, D26, 15))
T(0x4f2025f5, SRSHR(V21.S4(), V15.S4(), 32))
T(0x5f7734a9, SRSRA(D9, D5, 9))
T(0x0f3a371a, SRSRA(V26.S2(), V24.S2(), 6))
T(0x5eed44ee, SSHL(D14, D7, D13))
T(0x0e704683, SSHL(V3.H4(), V20.H4(), V16.H4()))
T(0x4f2aa7c3, SSHLL2(V3.D2(), V30.S4(), 10))
T(0x5f5e058d, SSHR(D13, D12, 34))
T(0x4f730496, SSHR(V22.D2(), V4.D2(), 13))
T(0x5f5e152a, SSRA(D10, D9, 34))
T(0x0f21172b, SSRA(V11.S2(), V25.S2(), 31))
T(0x4e24220f, SSUBL2(V15.H8(), V16.B16(), V4.B16()))
T(0x4e3f32a2, SSUBW2(V2.H8(), V21.H8(), V31.B16()))
T(0x0c007a62, ST1(List{V2.S2()}, X19))
T(0x4c00adb7, ST1(List{V23.D2(), V24.D2()}, X13))
T(0x0c006b92, ST1(List{V18.S2(), V19.S2(), V20.S2()}, X28))
T(0x4c0029b8, ST1(List{V24.S4(), V25.S4(), V26.S4(), V27.S4()}, X13))
T(0x0c9f7f60, ST1(List{V0.D1()}, X27, POST_INDEXED, 8))
T(0x0c9f7ebc, ST1(List{V28.D1()}, X21, POST_INDEXED, 8))
T(0x0c9faf06, ST1(List{V6.D1(), V7.D1()}, X24, POST_INDEXED, 16))
T(0x4c93aff5, ST1(List{V21.D2(), V22.D2()}, SP, POST_INDEXED, X19))
T(0x4c9f6398, ST1(List{V24.B16(), V25.B16(), V26.B16()}, X28, POST_INDEXED, 48))
T(0x4c8162ff, ST1(List{V31.B16(), V0.B16(), V1.B16()}, X23, POST_INDEXED, X1))
T(0x0c9f23ee, ST1(List{V14.B8(), V15.B8(), V16.B8(), V17.B8()}, SP, POST_INDEXED, 32))
T(0x4c862148, ST1(List{V8.B16(), V9.B16(), V10.B16(), V11.B16()}, X10, POST_INDEXED, X6))
T(0x0d001c7a, ST1(List{V26.B()}[7], X3))
T(0x0d005b54, ST1(List{V20.H()}[3], X26))
T(0x4d009392, ST1(List{V18.S()}[3], X28))
T(0x4d008509, ST1(List{V9.D()}[1], X8))
T(0x4d9f1246, ST1(List{V6.B()}[12], X18, POST_INDEXED, 1))
T(0x0d8c17f5, ST1(List{V21.B()}[5], SP, POST_INDEXED, X12))
T(0x4d9f53ee, ST1(List{V14.H()}[6], SP, POST_INDEXED, 2))
T(0x0d8f48c4, ST1(List{V4.H()}[1], X6, POST_INDEXED, X15))
T(0x4d9f8185, ST1(List{V5.S()}[2], X12, POST_INDEXED, 4))
T(0x0d8c92bc, ST1(List{V28.S()}[1], X21, POST_INDEXED, X12))
T(0x4d9f86b3, ST1(List{V19.D()}[1], X21, POST_INDEXED, 8))
T(0x4d9c8442, ST1(List{V2.D()}[1], X2, POST_INDEXED, X28))
T(0x4c008a69, ST2(List{V9.S4(), V10.S4()}, X19))
T(0x4c9f8930, ST2(List{V16.S4(), V17.S4()}, X9, POST_INDEXED, 32))
T(0x0c9a8993, ST2(List{V19.S2(), V20.S2()}, X12, POST_INDEXED, X26))
T(0x0d2001ac, ST2(List{V12.B(), V13.B()}[0], X13))
T(0x4d20495c, ST2(List{V28.H(), V29.H()}[5], X10))
T(0x4d2093e4, ST2(List{V4.S(), V5.S()}[3], SP))
T(0x4d208482, ST2(List{V2.D(), V3.D()}[1], X4))
T(0x4dbf0e40, ST2(List{V0.B(), V1.B()}[11], X18, POST_INDEXED, 2))
T(0x0db8085f, ST2(List{V31.B(), V0.B()}[2], X2, POST_INDEXED, X24))
T(0x0dbf4a2d, ST2(List{V13.H(), V14.H()}[1], X17, POST_INDEXED, 4))
T(0x4db1417e, ST2(List{V30.H(), V31.H()}[4], X11, POST_INDEXED, X17))
T(0x0dbf81af, ST2(List{V15.S(), V16.S()}[0], X13, POST_INDEXED, 8))
T(0x0dbf831c, ST2(List{V28.S(), V29.S()}[0], X24, POST_INDEXED, 8))
T(0x0dbf846a, ST2(List{V10.D(), V11.D()}[0], X3, POST_INDEXED, 16))
T(0x0dab85dc, ST2(List{V28.D(), V29.D()}[0], X14, POST_INDEXED, X11))
T(0x0c004a09, ST3(List{V9.S2(), V10.S2(), V11.S2()}, X16))
T(0x4c9f4768, ST3(List{V8.H8(), V9.H8(), V10.H8()}, X27, POST_INDEXED, 48))
T(0x0c944918, ST3(List{V24.S2(), V25.S2(), V26.S2()}, X8, POST_INDEXED, X20))
T(0x0d003f80, ST3(List{V0.B(), V1.B(), V2.B()}[7], X28))
T(0x0d007306, ST3(List{V6.H(), V7.H(), V8.H()}[2], X24))
T(0x0d00b131, ST3(List{V17.S(), V18.S(), V19.S()}[1], X9))
T(0x4d00a5f8, ST3(List{V24.D(), V25.D(), V26.D()}[1], X15))
T(0x0d9f27c1, ST3(List{V1.B(), V2.B(), V3.B()}[1], X30, POST_INDEXED, 3))
T(0x4d992bb2, ST3(List{V18.B(), V19.B(), V20.B()}[10], X29, POST_INDEXED, X25))
T(0x0d9f785d, ST3(List{V29.H(), V30.H(), V31.H()}[3], X2, POST_INDEXED, 6))
T(0x4d8b726b, ST3(List{V11.H(), V12.H(), V13.H()}[6], X19, POST_INDEXED, X11))
T(0x4d9fa342, ST3(List{V2.S(), V3.S(), V4.S()}[2], X26, POST_INDEXED, 12))
T(0x4d80b206, ST3(List{V6.S(), V7.S(), V8.S()}[3], X16, POST_INDEXED, X0))
T(0x4d9fa5de, ST3(List{V30.D(), V31.D(), V0.D()}[1], X14, POST_INDEXED, 24))
T(0x4d8ba6d7, ST3(List{V23.D(), V24.D(), V25.D()}[1], X22, POST_INDEXED, X11))
T(0x0c00034f, ST4(List{V15.B8(), V16.B8(), V17.B8(), V18.B8()}, X26))
T(0x4c9f038c, ST4(List{V12.B16(), V13.B16(), V14.B16(), V15.B16()}, X28, POST_INDEXED, 64))
T(0x4c800719, ST4(List{V25.H8(), V26.H8(), V27.H8(), V28.H8()}, X24, POST_INDEXED, X0))
T(0x0d2021a8, ST4(List{V8.B(), V9.B(), V10.B(), V11.B()}[0], X13))
T(0x4d2062cd, ST4(List{V13.H(), V14.H(), V15.H(), V16.H()}[4], X22))
T(0x0d20b146, ST4(List{V6.S(), V7.S(), V8.S(), V9.S()}[1], X10))
T(0x4d20a6f5, ST4(List{V21.D(), V22.D(), V23.D(), V24.D()}[1], X23))
T(0x0dbf2d56, ST4(List{V22.B(), V23.B(), V24.B(), V25.B()}[3], X10, POST_INDEXED, 4))
T(0x4da631df, ST4(List{V31.B(), V0.B(), V1.B(), V2.B()}[12], X14, POST_INDEXED, X6))
T(0x0dbf7a76, ST4(List{V22.H(), V23.H(), V24.H(), V25.H()}[3], X19, POST_INDEXED, 8))
T(0x0dbb698e, ST4(List{V14.H(), V15.H(), V16.H(), V17.H()}[1], X12, POST_INDEXED, X27))
T(0x4dbfb37f, ST4(List{V31.S(), V0.S(), V1.S(), V2.S()}[3], X27, POST_INDEXED, 16))
T(0x4dadb3d1, ST4(List{V17.S(), V18.S(), V19.S(), V20.S()}[3], X30, POST_INDEXED, X13))
T(0x4dbfa5b3, ST4(List{V19.D(), V20.D(), V21.D(), V22.D()}[1], X13, POST_INDEXED, 32))
T(0x4db5a7cf, ST4(List{V15.D(), V16.D(), V17.D(), V18.D()}[1], X30, POST_INDEXED, X21))
T(0x2c29149a, STNP(S26, S5, X4, -184))
T(0x6c229316, STNP(D22, D4, X24, -472))
T(0xac3bc3c8, STNP(Q8, Q16, X30, -144))
T(0x2cacdf66, STP(S6, S23, X27, POST_INDEXED, -156))
T(0x6c826f4f, STP(D15, D27, X26, POST_INDEXED, 32))
T(0xac97955a, STP(Q26, Q5, X10, POST_INDEXED, 752))
T(0x2da7ba37, STP(S23, S14, X17, PRE_INDEXED, -196))
T(0x6d8bcbce, STP(D14, D18, X30, PRE_INDEXED, 184))
T(0xad8b4ba6, STP(Q6, Q18, X29, PRE_INDEXED, 352))
T(0x2d1f7434, STP(S20, S29, X1, 248))
T(0x6d3bb5d8, STP(D24, D13, X14, -72))
T(0xad09088a, STP(Q10, Q2, X4, 288))
T(0x3c066467, STR(B7, X3, POST_INDEXED, 102))
T(0x7c070723, STR(H3, X25, POST_INDEXED, 112))
T(0xbc13175a, STR(S26, X26, POST_INDEXED, -207))
T(0xfc1be536, STR(D22, X9, POST_INDEXED, -66))
T(0x3c99b56b, STR(Q11, X11, POST_INDEXED, -101))
T(0x3c002d49, STR(B9, X10, PRE_INDEXED, 2))
T(0x7c158e09, STR(H9, X16, PRE_INDEXED, -168))
T(0xbc06bc8d, STR(S13, X4, PRE_INDEXED, 107))
T(0xfc080eae, STR(D14, X21, PRE_INDEXED, 128))
T(0x3c8e7ed9, STR(Q25, X22, PRE_INDEXED, 231))
T(0x3d275492, STR(B18, X4, 2517))
T(0x7d0b4265, STR(H5, X19, 1440))
T(0xbd0d2595, STR(S21, X12, 3364))
T(0xfd237a73, STR(D19, X19, 18160))
T(0x3db4a5f5, STR(Q21, X15, 53904))
T(0x3c3e693c, STR(B28, X9, X30, LSL, 0))
T(0x3c3b6ac5, STR(B5, X22, X27, LSL, 0))
T(0x7c36faf0, STR(H16, X23, X22, SXTX, 1))
T(0xbc27f838, STR(S24, X1, X7, SXTX, 2))
T(0xfc29db51, STR(D17, X26, W9, SXTW, 3))
T(0x3cbfea8f, STR(Q15, X20, XZR, SXTX))
T(0x3c0441c8, STUR(B8, X14, 68))
T(0x7c00b0d7, STUR(H23, X6, 11))
T(0xbc0d117d, STUR(S29, X11, 209))
T(0xfc1f03c0, STUR(D0, X30, -16))
T(0x3c9753f0, STUR(Q16, SP, -139))
T(0x7eeb84f9, SUB(D25, D7, D11))
T(0x6e708714, SUB(V20.H8(), V24.H8(), V16.H8()))
T(0x4e766323, SUBHN2(V3.H8(), V25.S4(), V22.S4()))
T(0x5e203935, SUQADD(B21, B9))
T(0x4e203b33, SUQADD(V19.B16(), V25.B16()))
// SXTL
T(0x0e0c20db, TBL(V27.B8(), List{V6.B16(), V7.B16()}, V12.B8()))
T(0x4e1d43ab, TBL(V11.B16(), List{V29.B16(), V30.B16(), V31.B16()}, V29.B16()))
T(0x0e07634f, TBL(V15.B8(), List{V26.B16(), V27.B16(), V28.B16(), V29.B16()}, V7.B8()))
T(0x0e0603b9, TBL(V25.B8(), List{V29.B16()}, V6.B8()))
T(0x0e05317a, TBX(V26.B8(), List{V11.B16(), V12.B16()}, V5.B8()))
T(0x4e0150ca, TBX(V10.B16(), List{V6.B16(), V7.B16(), V8.B16()}, V1.B16()))
T(0x4e0e7190, TBX(V16.B16(), List{V12.B16(), V13.B16(), V14.B16(), V15.B16()}, V14.B16()))
T(0x4e1b1333, TBX(V19.B16(), List{V25.B16()}, V27.B16()))
T(0x4e0829e3, TRN1(V3.B16(), V15.B16(), V8.B16()))
T(0x4ecc6b24, TRN2(V4.D2(), V25.D2(), V12.D2()))
T(0x2e697f5d, UABA(V29.H4(), V26.H4(), V9.H4()))
T(0x2e36519e, UABAL(V30.H8(), V12.B8(), V22.B8()))
T(0x6e6975e0, UABD(V0.H8(), V15.H8(), V9.H8()))
T(0x2e2e718a, UABDL(V10.H8(), V12.B8(), V14.B8()))
T(0x6ea069b1, UADALP(V17.D2(), V13.S4()))
T(0x2e6d0349, UADDL(V9.S4(), V26.H4(), V13.H4()))
T(0x6e602bfc, UADDLP(V28.S4(), V31.H8()))
T(0x6e703b6d, UADDLV(S13, V27.H8()))
T(0x2e781352, UADDW(V18.S4(), V26.S4(), V24.H4()))
T(0x1e03ec95, UCVTF(S21, W4, 5))
T(0x1e43fd36, UCVTF(D22, W9, 1))
T(0x9e03a27b, UCVTF(S27, X19, 24))
T(0x9e43e9c4, UCVTF(D4, X14, 6))
T(0x1e230096, UCVTF(S22, W4))
T(0x1e630076, UCVTF(D22, W3))
T(0x9e2302c8, UCVTF(S8, X22))
T(0x9e6302cd, UCVTF(D13, X22))
T(0x7f2ce5a2, UCVTF(S2, S13, 20))
T(0x6f4be788, UCVTF(V8.D2(), V28.D2(), 53))
T(0x7e21d87f, UCVTF(S31, S3))
T(0x2e21da7d, UCVTF(V29.S2(), V19.S2()))
T(0x2e7b0674, UHADD(V20.H4(), V19.H4(), V27.H4()))
T(0x6ea9277f, UHSUB(V31.S4(), V27.S4(), V9.S4()))
T(0x6e7a6658, UMAX(V24.H8(), V18.H8(), V26.H8()))
T(0x2e23a513, UMAXP(V19.B8(), V8.B8(), V3.B8()))
T(0x2e70a9b5, UMAXV(H21, V13.H4()))
T(0x6e7d6ef2, UMIN(V18.H8(), V23.H8(), V29.H8()))
T(0x2e6eae4e, UMINP(V14.H4(), V18.H4(), V14.H4()))
T(0x2e71abe6, UMINV(H6, V31.H4()))
T(0x6fb820fa, UMLAL2(V26.D2(), V7.S4(), V24.S()[1]))
T(0x6ebc83ab, UMLAL2(V11.D2(), V29.S4(), V28.S4()))
T(0x2f5c61cf, UMLSL(V15.S4(), V14.H4(), V12.H()[1]))
T(0x6e6aa2e2, UMLSL2(V2.S4(), V23.H8(), V10.H8()))
T(0x0e0f3fb8, UMOV(W24, V29.B()[7]))
// UMOV
T(0x6f62a05c, UMULL2(V28.S4(), V2.H8(), V2.H()[2]))
T(0x6e6cc3b0, UMULL2(V16.S4(), V29.H8(), V12.H8()))
T(0x7ea40f68, UQADD(S8, S27, S4))
T(0x6eac0e8f, UQADD(V15.S4(), V20.S4(), V12.S4()))
T(0x7e2a5df5, UQRSHL(B21, B15, B10))
T(0x6ef55fc9, UQRSHL(V9.D2(), V30.D2(), V21.D2()))
T(0x7f0b9db4, UQRSHRN(B20, H13, 5))
T(0x2f159d7d, UQRSHRN(V29.H4(), V11.S4(), 11))
T(0x7f6c755c, UQSHL(D28, D10, 44))
T(0x6f6175ec, UQSHL(V12.D2(), V15.D2(), 33))
T(0x7eef4ff4, UQSHL(D20, D31, D15))
T(0x6e3d4f2e, UQSHL(V14.B16(), V25.B16(), V29.B16()))
T(0x7f1f94d2, UQSHRN(H18, S6, 1))
T(0x6f3397e4, UQSHRN2(V4.S4(), V31.D2(), 13))
T(0x7ee12cad, UQSUB(D13, D5, D1))
T(0x2e712ff3, UQSUB(V19.H4(), V31.H4(), V17.H4()))
T(0x7e614b06, UQXTN(H6, S24))
T(0x6e2149ec, UQXTN2(V12.B16(), V15.H8()))
T(0x0ea1c849, URECPE(V9.S2(), V2.S2()))
T(0x6eb51740, URHADD(V0.S4(), V26.S4(), V21.S4()))
T(0x7eeb57f8, URSHL(D24, D31, D11))
T(0x6e335531, URSHL(V17.B16(), V9.B16(), V19.B16()))
T(0x7f65253d, URSHR(D29, D9, 27))
T(0x2f102566, URSHR(V6.H4(), V11.H4(), 16))
T(0x2ea1cb59, URSQRTE(V25.S2(), V26.S2()))
T(0x7f54345f, URSRA(D31, D2, 44))
T(0x2f1b345f, URSRA(V31.H4(), V2.H4(), 5))
T(0x7ef94448, USHL(D8, D2, D25))
T(0x6ea14621, USHL(V1.S4(), V17.S4(), V1.S4()))
T(0x2f33a5a1, USHLL(V1.D2(), V13.S2(), 19))
T(0x7f5405d0, USHR(D16, D14, 44))
T(0x6f450505, USHR(V5.D2(), V8.D2(), 59))
T(0x7ea038c1, USQADD(S1, S6))
T(0x2e203b60, USQADD(V0.B8(), V27.B8()))
T(0x7f4616d2, USRA(D18, D22, 58))
T(0x2f1a1713, USRA(V19.H4(), V24.H4(), 6))
T(0x2e3f226e, USUBL(V14.H8(), V19.B8(), V31.B8()))
T(0x6e7a33a0, USUBW2(V0.S4(), V29.S4(), V26.H8()))
// UXTL
T(0x4e1b1a1f, UZP1(V31.B16(), V16.B16(), V27.B16()))
T(0x4ecc597b, UZP2(V27.D2(), V11.D2(), V12.D2()))
T(0x0e212af7, XTN(V23.B8(), V23.H8()))
T(0x4e853928, ZIP1(V8.S4(), V9.S4(), V5.S4()))
T(0x0e977a78, ZIP2(V24.S2(), V19.S2(), V23.S2()))

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,20 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <random>
#include <type_traits>
template<typename T>
T RandInt(T min, T max)
{
static_assert(std::is_integral_v<T>, "T must be an integral type.");
static_assert(!std::is_same_v<T, signed char> && !std::is_same_v<T, unsigned char>,
"Using char with uniform_int_distribution is undefined behavior.");
static std::random_device rd;
static std::mt19937 mt(rd());
std::uniform_int_distribution<T> rand(min, max);
return rand(mt);
}

View file

@ -235,8 +235,9 @@ void A32EmitX64::GenTerminalHandlers() {
calculate_location_descriptor(); calculate_location_descriptor();
code.L(rsb_cache_miss); code.L(rsb_cache_miss);
code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data())); code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data()));
code.mov(rbp, rbx);
if (code.HasHostFeature(HostFeature::SSE42)) { if (code.HasHostFeature(HostFeature::SSE42)) {
code.crc32(ebp, r12d); code.crc32(rbp, r12);
} }
code.and_(ebp, fast_dispatch_table_mask); code.and_(ebp, fast_dispatch_table_mask);
code.lea(rbp, ptr[r12 + rbp]); code.lea(rbp, ptr[r12 + rbp]);
@ -254,11 +255,12 @@ void A32EmitX64::GenTerminalHandlers() {
fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry& (*)(u64)>(); fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry& (*)(u64)>();
code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data())); code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data()));
if (code.HasHostFeature(HostFeature::SSE42)) { if (code.HasHostFeature(HostFeature::SSE42)) {
code.crc32(code.ABI_PARAM1.cvt32(), code.ABI_PARAM2.cvt32()); code.crc32(code.ABI_PARAM1, code.ABI_PARAM2);
} }
code.and_(code.ABI_PARAM1.cvt32(), fast_dispatch_table_mask); code.and_(code.ABI_PARAM1.cvt32(), fast_dispatch_table_mask);
code.lea(code.ABI_RETURN, code.ptr[code.ABI_PARAM1 + code.ABI_PARAM2]); code.lea(code.ABI_RETURN, code.ptr[code.ABI_PARAM1 + code.ABI_PARAM2]);
code.ret(); code.ret();
PerfMapRegister(fast_dispatch_table_lookup, code.getCurr(), "a32_fast_dispatch_table_lookup");
} }
} }

View file

@ -101,6 +101,10 @@ struct Jit::Impl {
Atomic::Or(&jit_state.halt_reason, static_cast<u32>(hr)); Atomic::Or(&jit_state.halt_reason, static_cast<u32>(hr));
} }
void ClearHalt(HaltReason hr) {
Atomic::And(&jit_state.halt_reason, ~static_cast<u32>(hr));
}
void ClearExclusiveState() { void ClearExclusiveState() {
jit_state.exclusive_state = 0; jit_state.exclusive_state = 0;
} }
@ -232,6 +236,10 @@ void Jit::HaltExecution(HaltReason hr) {
impl->HaltExecution(hr); impl->HaltExecution(hr);
} }
void Jit::ClearHalt(HaltReason hr) {
impl->ClearHalt(hr);
}
void Jit::ClearExclusiveState() { void Jit::ClearExclusiveState() {
impl->ClearExclusiveState(); impl->ClearExclusiveState();
} }

View file

@ -193,8 +193,9 @@ void A64EmitX64::GenTerminalHandlers() {
calculate_location_descriptor(); calculate_location_descriptor();
code.L(rsb_cache_miss); code.L(rsb_cache_miss);
code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data())); code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data()));
code.mov(rbp, rbx);
if (code.HasHostFeature(HostFeature::SSE42)) { if (code.HasHostFeature(HostFeature::SSE42)) {
code.crc32(rbx, r12d); code.crc32(rbp, r12);
} }
code.and_(ebp, fast_dispatch_table_mask); code.and_(ebp, fast_dispatch_table_mask);
code.lea(rbp, ptr[r12 + rbp]); code.lea(rbp, ptr[r12 + rbp]);
@ -215,7 +216,7 @@ void A64EmitX64::GenTerminalHandlers() {
code.crc32(code.ABI_PARAM1, code.ABI_PARAM2); code.crc32(code.ABI_PARAM1, code.ABI_PARAM2);
} }
code.and_(code.ABI_PARAM1.cvt32(), fast_dispatch_table_mask); code.and_(code.ABI_PARAM1.cvt32(), fast_dispatch_table_mask);
code.lea(code.ABI_RETURN, code.ptr[code.ABI_PARAM1 + code.ABI_PARAM2]); code.lea(code.ABI_RETURN, code.ptr[code.ABI_PARAM2 + code.ABI_PARAM1]);
code.ret(); code.ret();
PerfMapRegister(fast_dispatch_table_lookup, code.getCurr(), "a64_fast_dispatch_table_lookup"); PerfMapRegister(fast_dispatch_table_lookup, code.getCurr(), "a64_fast_dispatch_table_lookup");
} }

View file

@ -134,6 +134,10 @@ public:
Atomic::Or(&jit_state.halt_reason, static_cast<u32>(hr)); Atomic::Or(&jit_state.halt_reason, static_cast<u32>(hr));
} }
void ClearHalt(HaltReason hr) {
Atomic::And(&jit_state.halt_reason, ~static_cast<u32>(hr));
}
u64 GetSP() const { u64 GetSP() const {
return jit_state.sp; return jit_state.sp;
} }
@ -351,6 +355,10 @@ void Jit::HaltExecution(HaltReason hr) {
impl->HaltExecution(hr); impl->HaltExecution(hr);
} }
void Jit::ClearHalt(HaltReason hr) {
impl->ClearHalt(hr);
}
u64 Jit::GetSP() const { u64 Jit::GetSP() const {
return impl->GetSP(); return impl->GetSP();
} }

View file

@ -402,7 +402,7 @@ void BlockOfCode::LookupBlock() {
cb.LookupBlock->EmitCall(*this); cb.LookupBlock->EmitCall(*this);
} }
Xbyak::Address BlockOfCode::MConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) { Xbyak::Address BlockOfCode::XmmConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) {
return constant_pool.GetConstant(frame, lower, upper); return constant_pool.GetConstant(frame, lower, upper);
} }

View file

@ -21,6 +21,7 @@
#include "dynarmic/backend/x64/jitstate_info.h" #include "dynarmic/backend/x64/jitstate_info.h"
#include "dynarmic/common/cast_util.h" #include "dynarmic/common/cast_util.h"
#include "dynarmic/interface/halt_reason.h" #include "dynarmic/interface/halt_reason.h"
#include "mcl/bit/bit_field.hpp"
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
@ -116,7 +117,13 @@ public:
} }
} }
Xbyak::Address MConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper = 0); Xbyak::Address XmmConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper);
template<size_t esize>
Xbyak::Address XmmBConst(const Xbyak::AddressFrame& frame, u64 value) {
return XmmConst(frame, mcl::bit::replicate_element<u64>(esize, value),
mcl::bit::replicate_element<u64>(esize, value));
}
/// Far code sits far away from the near code. Execution remains primarily in near code. /// Far code sits far away from the near code. Execution remains primarily in near code.
/// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary. /// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary.

View file

@ -14,22 +14,22 @@
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
ConstantPool::ConstantPool(BlockOfCode& code, size_t size) ConstantPool::ConstantPool(BlockOfCode& code, size_t size)
: code(code), pool_size(size) { : code(code), insertion_point(0) {
code.int3(); code.int3();
code.align(align_size); code.align(align_size);
pool_begin = reinterpret_cast<u8*>(code.AllocateFromCodeSpace(size)); pool = std::span<ConstantT>(
current_pool_ptr = pool_begin; reinterpret_cast<ConstantT*>(code.AllocateFromCodeSpace(size)), size / align_size);
} }
Xbyak::Address ConstantPool::GetConstant(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) { Xbyak::Address ConstantPool::GetConstant(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) {
const auto constant = std::make_pair(lower, upper); const auto constant = ConstantT(lower, upper);
auto iter = constant_info.find(constant); auto iter = constant_info.find(constant);
if (iter == constant_info.end()) { if (iter == constant_info.end()) {
ASSERT(static_cast<size_t>(current_pool_ptr - pool_begin) < pool_size); ASSERT(insertion_point < pool.size());
std::memcpy(current_pool_ptr, &lower, sizeof(u64)); ConstantT& target_constant = pool[insertion_point];
std::memcpy(current_pool_ptr + sizeof(u64), &upper, sizeof(u64)); target_constant = constant;
iter = constant_info.emplace(constant, current_pool_ptr).first; iter = constant_info.emplace(constant, &target_constant).first;
current_pool_ptr += align_size; ++insertion_point;
} }
return frame[code.rip + iter->second]; return frame[code.rip + iter->second];
} }

View file

@ -6,6 +6,8 @@
#pragma once #pragma once
#include <bit> #include <bit>
#include <cstddef>
#include <span>
#include <utility> #include <utility>
#include <mcl/stdint.hpp> #include <mcl/stdint.hpp>
@ -29,18 +31,20 @@ public:
private: private:
static constexpr size_t align_size = 16; // bytes static constexpr size_t align_size = 16; // bytes
using ConstantT = std::pair<u64, u64>;
static_assert(sizeof(ConstantT) == align_size);
struct ConstantHash { struct ConstantHash {
std::size_t operator()(const std::pair<u64, u64>& constant) const noexcept { std::size_t operator()(const ConstantT& constant) const noexcept {
return constant.first ^ std::rotl<u64>(constant.second, 1); return constant.first ^ std::rotl<u64>(constant.second, 1);
} }
}; };
tsl::robin_map<std::pair<u64, u64>, void*, ConstantHash> constant_info; tsl::robin_map<ConstantT, void*, ConstantHash> constant_info;
BlockOfCode& code; BlockOfCode& code;
size_t pool_size; std::span<ConstantT> pool;
u8* pool_begin; std::size_t insertion_point;
u8* current_pool_ptr;
}; };
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -325,7 +325,9 @@ void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_co
} }
void EmitX64::Unpatch(const IR::LocationDescriptor& target_desc) { void EmitX64::Unpatch(const IR::LocationDescriptor& target_desc) {
if (patch_information.count(target_desc)) {
Patch(target_desc, nullptr); Patch(target_desc, nullptr);
}
} }
void EmitX64::ClearCache() { void EmitX64::ClearCache() {
@ -345,9 +347,8 @@ void EmitX64::InvalidateBasicBlocks(const tsl::robin_set<IR::LocationDescriptor>
continue; continue;
} }
if (patch_information.count(descriptor)) {
Unpatch(descriptor); Unpatch(descriptor);
}
block_descriptors.erase(it); block_descriptors.erase(it);
} }
} }

View file

@ -42,7 +42,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
code.movdqa(xmm_const, code.MConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641)); code.movdqa(xmm_const, code.XmmConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
code.movzx(value.cvt32(), value.changeBit(data_size)); code.movzx(value.cvt32(), value.changeBit(data_size));
code.xor_(value.cvt32(), crc); code.xor_(value.cvt32(), crc);
@ -72,7 +72,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
code.movdqa(xmm_const, code.MConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641)); code.movdqa(xmm_const, code.XmmConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
code.xor_(crc, value); code.xor_(crc, value);
code.shl(crc.cvt64(), 32); code.shl(crc.cvt64(), 32);
@ -93,7 +93,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
code.movdqa(xmm_const, code.MConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641)); code.movdqa(xmm_const, code.XmmConst(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
code.mov(crc, crc); code.mov(crc, crc);
code.xor_(crc.cvt64(), value); code.xor_(crc.cvt64(), value);

View file

@ -90,10 +90,9 @@ void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list
FpFixup::Norm_Src, FpFixup::Norm_Src,
FpFixup::Norm_Src, FpFixup::Norm_Src,
FpFixup::Norm_Src); FpFixup::Norm_Src);
constexpr u64 denormal_to_zero64 = mcl::bit::replicate_element<fsize, u64>(denormal_to_zero);
const Xbyak::Xmm tmp = xmm16; const Xbyak::Xmm tmp = xmm16;
FCODE(vmovap)(tmp, code.MConst(xword, u64(denormal_to_zero64), u64(denormal_to_zero64))); FCODE(vmovap)(tmp, code.XmmBConst<fsize>(xword, denormal_to_zero));
for (const Xbyak::Xmm& xmm : to_daz) { for (const Xbyak::Xmm& xmm : to_daz) {
FCODE(vfixupimms)(xmm, xmm, tmp, u8(0)); FCODE(vfixupimms)(xmm, xmm, tmp, u8(0));
@ -102,17 +101,17 @@ void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list
} }
for (const Xbyak::Xmm& xmm : to_daz) { for (const Xbyak::Xmm& xmm : to_daz) {
code.movaps(xmm0, code.MConst(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask)); code.movaps(xmm0, code.XmmBConst<fsize>(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask));
code.andps(xmm0, xmm); code.andps(xmm0, xmm);
if constexpr (fsize == 32) { if constexpr (fsize == 32) {
code.pcmpgtd(xmm0, code.MConst(xword, f32_smallest_normal - 1)); code.pcmpgtd(xmm0, code.XmmBConst<32>(xword, f32_smallest_normal - 1));
} else if (code.HasHostFeature(HostFeature::SSE42)) { } else if (code.HasHostFeature(HostFeature::SSE42)) {
code.pcmpgtq(xmm0, code.MConst(xword, f64_smallest_normal - 1)); code.pcmpgtq(xmm0, code.XmmBConst<64>(xword, f64_smallest_normal - 1));
} else { } else {
code.pcmpgtd(xmm0, code.MConst(xword, f64_smallest_normal - 1)); code.pcmpgtd(xmm0, code.XmmBConst<64>(xword, f64_smallest_normal - 1));
code.pshufd(xmm0, xmm0, 0b11100101); code.pshufd(xmm0, xmm0, 0b11100101);
} }
code.orps(xmm0, code.MConst(xword, fsize == 32 ? f32_negative_zero : f64_negative_zero)); code.orps(xmm0, code.XmmBConst<fsize>(xword, fsize == 32 ? f32_negative_zero : f64_negative_zero));
code.andps(xmm, xmm0); code.andps(xmm, xmm0);
} }
} }
@ -123,7 +122,7 @@ void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch)
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero, constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero,
FpFixup::PosZero); FpFixup::PosZero);
FCODE(vfixupimms)(xmm_value, xmm_value, code.MConst(ptr, u64(nan_to_zero)), u8(0)); FCODE(vfixupimms)(xmm_value, xmm_value, code.XmmBConst<32>(ptr, nan_to_zero), u8(0));
} else if (code.HasHostFeature(HostFeature::AVX)) { } else if (code.HasHostFeature(HostFeature::AVX)) {
FCODE(vcmpords)(xmm_scratch, xmm_value, xmm_value); FCODE(vcmpords)(xmm_scratch, xmm_value, xmm_value);
FCODE(vandp)(xmm_value, xmm_value, xmm_scratch); FCODE(vandp)(xmm_value, xmm_value, xmm_scratch);
@ -139,15 +138,15 @@ void ForceToDefaultNaN(BlockOfCode& code, Xbyak::Xmm result) {
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
const Xbyak::Opmask nan_mask = k1; const Xbyak::Opmask nan_mask = k1;
FCODE(vfpclasss)(nan_mask, result, u8(FpClass::QNaN | FpClass::SNaN)); FCODE(vfpclasss)(nan_mask, result, u8(FpClass::QNaN | FpClass::SNaN));
FCODE(vblendmp)(result | nan_mask, result, code.MConst(ptr_b, fsize == 32 ? f32_nan : f64_nan)); FCODE(vblendmp)(result | nan_mask, result, code.XmmBConst<fsize>(ptr_b, fsize == 32 ? f32_nan : f64_nan));
} else if (code.HasHostFeature(HostFeature::AVX)) { } else if (code.HasHostFeature(HostFeature::AVX)) {
FCODE(vcmpunords)(xmm0, result, result); FCODE(vcmpunords)(xmm0, result, result);
FCODE(blendvp)(result, code.MConst(xword, fsize == 32 ? f32_nan : f64_nan)); FCODE(blendvp)(result, code.XmmBConst<fsize>(xword, fsize == 32 ? f32_nan : f64_nan));
} else { } else {
Xbyak::Label end; Xbyak::Label end;
FCODE(ucomis)(result, result); FCODE(ucomis)(result, result);
code.jnp(end); code.jnp(end);
code.movaps(result, code.MConst(xword, fsize == 32 ? f32_nan : f64_nan)); code.movaps(result, code.XmmBConst<fsize>(xword, fsize == 32 ? f32_nan : f64_nan));
code.L(end); code.L(end);
} }
} }
@ -161,7 +160,7 @@ Xbyak::Label ProcessNaN(BlockOfCode& code, Xbyak::Xmm a) {
code.SwitchToFarCode(); code.SwitchToFarCode();
code.L(nan); code.L(nan);
code.orps(a, code.MConst(xword, fsize == 32 ? 0x00400000 : 0x0008'0000'0000'0000)); code.orps(a, code.XmmBConst<fsize>(xword, fsize == 32 ? 0x00400000 : 0x0008'0000'0000'0000));
code.jmp(end, code.T_NEAR); code.jmp(end, code.T_NEAR);
code.SwitchToNearCode(); code.SwitchToNearCode();
@ -257,10 +256,10 @@ void EmitPostProcessNaNs(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm op1, X
// Silence the SNaN as required by spec. // Silence the SNaN as required by spec.
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX)) {
code.vorps(result, op2, code.MConst(xword, mantissa_msb)); code.vorps(result, op2, code.XmmBConst<fsize>(xword, mantissa_msb));
} else { } else {
code.movaps(result, op2); code.movaps(result, op2);
code.orps(result, code.MConst(xword, mantissa_msb)); code.orps(result, code.XmmBConst<fsize>(xword, mantissa_msb));
} }
code.jmp(end, code.T_NEAR); code.jmp(end, code.T_NEAR);
} }
@ -339,7 +338,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
FCODE(ucomis)(op1, op2); FCODE(ucomis)(op1, op2);
code.jp(op_are_nans); code.jp(op_are_nans);
// Here we must return a positive NaN, because the indefinite value on x86 is a negative NaN! // Here we must return a positive NaN, because the indefinite value on x86 is a negative NaN!
code.movaps(result, code.MConst(xword, FP::FPInfo<FPT>::DefaultNaN())); code.movaps(result, code.XmmBConst<fsize>(xword, FP::FPInfo<FPT>::DefaultNaN()));
code.jmp(end, code.T_NEAR); code.jmp(end, code.T_NEAR);
code.L(op_are_nans); code.L(op_are_nans);
EmitPostProcessNaNs<fsize>(code, result, op1, op2, tmp, end); EmitPostProcessNaNs<fsize>(code, result, op1, op2, tmp, end);
@ -357,7 +356,7 @@ void FPAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Address mask = code.MConst(xword, non_sign_mask); const Xbyak::Address mask = code.XmmBConst<fsize>(xword, non_sign_mask);
code.andps(result, mask); code.andps(result, mask);
@ -383,7 +382,7 @@ void FPNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Address mask = code.MConst(xword, u64(sign_mask)); const Xbyak::Address mask = code.XmmBConst<fsize>(xword, u64(sign_mask));
code.xorps(result, mask); code.xorps(result, mask);
@ -453,7 +452,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.L(nan); code.L(nan);
if (ctx.FPCR().DN()) { if (ctx.FPCR().DN()) {
code.movaps(result, code.MConst(xword, fsize == 32 ? f32_nan : f64_nan)); code.movaps(result, code.XmmBConst<fsize>(xword, fsize == 32 ? f32_nan : f64_nan));
code.jmp(end); code.jmp(end);
} else { } else {
code.movaps(tmp, result); code.movaps(tmp, result);
@ -487,7 +486,7 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
if (ctx.FPCR().DN()) { if (ctx.FPCR().DN()) {
FCODE(vcmps)(k1, op2, op2, Cmp::Unordered_Q); FCODE(vcmps)(k1, op2, op2, Cmp::Unordered_Q);
FCODE(vmovs)(op2 | k1, code.MConst(xword, default_nan)); FCODE(vmovs)(op2 | k1, code.XmmBConst<fsize>(xword, default_nan));
} }
} else { } else {
Xbyak::Reg tmp = ctx.reg_alloc.ScratchGpr(); Xbyak::Reg tmp = ctx.reg_alloc.ScratchGpr();
@ -542,12 +541,12 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
code.jc(maybe_both_nan); code.jc(maybe_both_nan);
if (ctx.FPCR().DN()) { if (ctx.FPCR().DN()) {
code.L(snan); code.L(snan);
code.movaps(op2, code.MConst(xword, default_nan)); code.movaps(op2, code.XmmBConst<fsize>(xword, default_nan));
code.jmp(end); code.jmp(end);
} else { } else {
code.movaps(op2, op1); code.movaps(op2, op1);
code.L(snan); code.L(snan);
code.orps(op2, code.MConst(xword, FP::FPInfo<FPT>::mantissa_msb)); code.orps(op2, code.XmmBConst<fsize>(xword, FP::FPInfo<FPT>::mantissa_msb));
code.jmp(end); code.jmp(end);
} }
@ -648,9 +647,9 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.movaps(result, operand1); code.movaps(result, operand1);
FCODE(vfmadd231s)(result, operand2, operand3); FCODE(vfmadd231s)(result, operand2, operand3);
code.movaps(tmp, code.MConst(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask)); code.movaps(tmp, code.XmmBConst<fsize>(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask));
code.andps(tmp, result); code.andps(tmp, result);
FCODE(ucomis)(tmp, code.MConst(xword, fsize == 32 ? f32_smallest_normal : f64_smallest_normal)); FCODE(ucomis)(tmp, code.XmmBConst<fsize>(xword, fsize == 32 ? f32_smallest_normal : f64_smallest_normal));
code.jz(fallback, code.T_NEAR); code.jz(fallback, code.T_NEAR);
code.L(end); code.L(end);
@ -758,12 +757,12 @@ static void EmitFPMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.movaps(result, op1); code.movaps(result, op1);
code.xorps(result, op2); code.xorps(result, op2);
} }
code.andps(result, code.MConst(xword, FP::FPInfo<FPT>::sign_mask)); code.andps(result, code.XmmBConst<fsize>(xword, FP::FPInfo<FPT>::sign_mask));
code.orps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 2>())); code.orps(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 2>()));
code.jmp(end, code.T_NEAR); code.jmp(end, code.T_NEAR);
code.L(op_are_nans); code.L(op_are_nans);
if (do_default_nan) { if (do_default_nan) {
code.movaps(result, code.MConst(xword, FP::FPInfo<FPT>::DefaultNaN())); code.movaps(result, code.XmmBConst<fsize>(xword, FP::FPInfo<FPT>::DefaultNaN()));
code.jmp(end, code.T_NEAR); code.jmp(end, code.T_NEAR);
} else { } else {
EmitPostProcessNaNs<fsize>(code, result, op1, op2, tmp, end); EmitPostProcessNaNs<fsize>(code, result, op1, op2, tmp, end);
@ -864,7 +863,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code.movaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 2>())); code.movaps(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 2>()));
FCODE(vfnmadd231s)(result, operand1, operand2); FCODE(vfnmadd231s)(result, operand1, operand2);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
@ -878,7 +877,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code.movaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 2>())); code.movaps(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 2>()));
FCODE(vfnmadd231s)(result, operand1, operand2); FCODE(vfnmadd231s)(result, operand1, operand2);
FCODE(ucomis)(result, result); FCODE(ucomis)(result, result);
code.jp(fallback, code.T_NEAR); code.jp(fallback, code.T_NEAR);
@ -910,7 +909,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code.movaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 2>())); code.movaps(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 2>()));
FCODE(muls)(operand1, operand2); FCODE(muls)(operand1, operand2);
FCODE(subs)(result, operand1); FCODE(subs)(result, operand1);
@ -1040,19 +1039,19 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
code.movaps(value, operand); code.movaps(value, operand);
code.movaps(xmm0, code.MConst(xword, fsize == 32 ? 0xFFFF8000 : 0xFFFF'F000'0000'0000)); code.movaps(xmm0, code.XmmBConst<fsize>(xword, fsize == 32 ? 0xFFFF8000 : 0xFFFF'F000'0000'0000));
code.pand(value, xmm0); code.pand(value, xmm0);
code.por(value, code.MConst(xword, fsize == 32 ? 0x00008000 : 0x0000'1000'0000'0000)); code.por(value, code.XmmBConst<fsize>(xword, fsize == 32 ? 0x00008000 : 0x0000'1000'0000'0000));
// Detect NaNs, negatives, zeros, denormals and infinities // Detect NaNs, negatives, zeros, denormals and infinities
FCODE(ucomis)(value, code.MConst(xword, FPT(1) << FP::FPInfo<FPT>::explicit_mantissa_width)); FCODE(ucomis)(value, code.XmmBConst<fsize>(xword, FPT(1) << FP::FPInfo<FPT>::explicit_mantissa_width));
code.jna(bad_values, code.T_NEAR); code.jna(bad_values, code.T_NEAR);
FCODE(sqrts)(value, value); FCODE(sqrts)(value, value);
ICODE(mov)(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 1>())); ICODE(mov)(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 1>()));
FCODE(divs)(result, value); FCODE(divs)(result, value);
ICODE(padd)(result, code.MConst(xword, fsize == 32 ? 0x00004000 : 0x0000'0800'0000'0000)); ICODE(padd)(result, code.XmmBConst<fsize>(xword, fsize == 32 ? 0x00004000 : 0x0000'0800'0000'0000));
code.pand(result, xmm0); code.pand(result, xmm0);
code.L(end); code.L(end);
@ -1091,7 +1090,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
} }
code.L(default_nan); code.L(default_nan);
code.movd(result, code.MConst(xword, 0x7FC00000)); code.movd(result, code.XmmBConst<32>(xword, 0x7FC00000));
code.jmp(end, code.T_NEAR); code.jmp(end, code.T_NEAR);
} else { } else {
Xbyak::Label nan, zero; Xbyak::Label nan, zero;
@ -1120,26 +1119,26 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
code.L(zero); code.L(zero);
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX)) {
code.vpor(result, value, code.MConst(xword, 0x7FF0'0000'0000'0000)); code.vpor(result, value, code.XmmBConst<64>(xword, 0x7FF0'0000'0000'0000));
} else { } else {
code.movaps(result, value); code.movaps(result, value);
code.por(result, code.MConst(xword, 0x7FF0'0000'0000'0000)); code.por(result, code.XmmBConst<64>(xword, 0x7FF0'0000'0000'0000));
} }
code.jmp(end, code.T_NEAR); code.jmp(end, code.T_NEAR);
code.L(nan); code.L(nan);
if (!ctx.FPCR().DN()) { if (!ctx.FPCR().DN()) {
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX)) {
code.vpor(result, operand, code.MConst(xword, 0x0008'0000'0000'0000)); code.vpor(result, operand, code.XmmBConst<64>(xword, 0x0008'0000'0000'0000));
} else { } else {
code.movaps(result, operand); code.movaps(result, operand);
code.por(result, code.MConst(xword, 0x0008'0000'0000'0000)); code.por(result, code.XmmBConst<64>(xword, 0x0008'0000'0000'0000));
} }
code.jmp(end, code.T_NEAR); code.jmp(end, code.T_NEAR);
} }
code.L(default_nan); code.L(default_nan);
code.movq(result, code.MConst(xword, 0x7FF8'0000'0000'0000)); code.movq(result, code.XmmBConst<64>(xword, 0x7FF8'0000'0000'0000));
code.jmp(end, code.T_NEAR); code.jmp(end, code.T_NEAR);
} }
@ -1193,9 +1192,9 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code.vmovaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 3>())); code.vmovaps(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 3>()));
FCODE(vfnmadd231s)(result, operand1, operand2); FCODE(vfnmadd231s)(result, operand1, operand2);
FCODE(vmuls)(result, result, code.MConst(xword, FP::FPValue<FPT, false, -1, 1>())); FCODE(vmuls)(result, result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, -1, 1>()));
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
return; return;
@ -1208,7 +1207,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code.vmovaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 3>())); code.vmovaps(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 3>()));
FCODE(vfnmadd231s)(result, operand1, operand2); FCODE(vfnmadd231s)(result, operand1, operand2);
// Detect if the intermediate result is infinity or NaN or nearly an infinity. // Detect if the intermediate result is infinity or NaN or nearly an infinity.
@ -1223,7 +1222,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
code.jae(fallback, code.T_NEAR); code.jae(fallback, code.T_NEAR);
FCODE(vmuls)(result, result, code.MConst(xword, FP::FPValue<FPT, false, -1, 1>())); FCODE(vmuls)(result, result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, -1, 1>()));
code.L(end); code.L(end);
code.SwitchToFarCode(); code.SwitchToFarCode();
@ -1252,10 +1251,10 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code.movaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 3>())); code.movaps(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, 0, 3>()));
FCODE(muls)(operand1, operand2); FCODE(muls)(operand1, operand2);
FCODE(subs)(result, operand1); FCODE(subs)(result, operand1);
FCODE(muls)(result, code.MConst(xword, FP::FPValue<FPT, false, -1, 1>())); FCODE(muls)(result, code.XmmBConst<fsize>(xword, FP::FPValue<FPT, false, -1, 1>()));
ctx.reg_alloc.DefineValue(inst, operand1); ctx.reg_alloc.DefineValue(inst, operand1);
return; return;
@ -1507,7 +1506,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
if constexpr (fsize == 64) { if constexpr (fsize == 64) {
if (fbits != 0) { if (fbits != 0) {
const u64 scale_factor = static_cast<u64>((fbits + 1023) << 52); const u64 scale_factor = static_cast<u64>((fbits + 1023) << 52);
code.mulsd(src, code.MConst(xword, scale_factor)); code.mulsd(src, code.XmmBConst<64>(xword, scale_factor));
} }
if (!truncating) { if (!truncating) {
@ -1516,7 +1515,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
} else { } else {
if (fbits != 0) { if (fbits != 0) {
const u32 scale_factor = static_cast<u32>((fbits + 127) << 23); const u32 scale_factor = static_cast<u32>((fbits + 127) << 23);
code.mulss(src, code.MConst(xword, scale_factor)); code.mulss(src, code.XmmBConst<32>(xword, scale_factor));
} }
if (!truncating) { if (!truncating) {
@ -1534,7 +1533,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
if (!unsigned_) { if (!unsigned_) {
ZeroIfNaN<64>(code, src, scratch); ZeroIfNaN<64>(code, src, scratch);
code.movsd(scratch, code.MConst(xword, f64_max_s64_lim)); code.movsd(scratch, code.XmmBConst<64>(xword, f64_max_s64_lim));
code.comisd(scratch, src); code.comisd(scratch, src);
code.jna(saturate_max, code.T_NEAR); code.jna(saturate_max, code.T_NEAR);
code.cvttsd2si(result, src); // 64 bit gpr code.cvttsd2si(result, src); // 64 bit gpr
@ -1553,7 +1552,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.pxor(xmm0, xmm0); code.pxor(xmm0, xmm0);
code.movaps(scratch, src); code.movaps(scratch, src);
code.subsd(scratch, code.MConst(xword, f64_max_s64_lim)); code.subsd(scratch, code.XmmBConst<64>(xword, f64_max_s64_lim));
// these both result in zero if src/scratch are NaN // these both result in zero if src/scratch are NaN
code.maxsd(src, xmm0); code.maxsd(src, xmm0);
@ -1575,21 +1574,21 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
ZeroIfNaN<64>(code, src, scratch); ZeroIfNaN<64>(code, src, scratch);
code.minsd(src, code.MConst(xword, f64_max_s32)); code.minsd(src, code.XmmBConst<64>(xword, f64_max_s32));
// maxsd not required as cvttsd2si results in 0x8000'0000 when out of range // maxsd not required as cvttsd2si results in 0x8000'0000 when out of range
code.cvttsd2si(result.cvt32(), src); // 32 bit gpr code.cvttsd2si(result.cvt32(), src); // 32 bit gpr
} else { } else {
code.pxor(xmm0, xmm0); code.pxor(xmm0, xmm0);
code.maxsd(src, xmm0); // results in a zero if src is NaN code.maxsd(src, xmm0); // results in a zero if src is NaN
code.minsd(src, code.MConst(xword, f64_max_u32)); code.minsd(src, code.XmmBConst<64>(xword, f64_max_u32));
code.cvttsd2si(result, src); // 64 bit gpr code.cvttsd2si(result, src); // 64 bit gpr
} }
} else { } else {
const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
ZeroIfNaN<64>(code, src, scratch); ZeroIfNaN<64>(code, src, scratch);
code.maxsd(src, code.MConst(xword, unsigned_ ? f64_min_u16 : f64_min_s16)); code.maxsd(src, code.XmmBConst<64>(xword, unsigned_ ? f64_min_u16 : f64_min_s16));
code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u16 : f64_max_s16)); code.minsd(src, code.XmmBConst<64>(xword, unsigned_ ? f64_max_u16 : f64_max_s16));
code.cvttsd2si(result, src); // 64 bit gpr code.cvttsd2si(result, src); // 64 bit gpr
} }
@ -1714,7 +1713,7 @@ void EmitX64::EmitFPFixedS16ToSingle(EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
const u32 scale_factor = static_cast<u32>((127 - fbits) << 23); const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
code.mulss(result, code.MConst(xword, scale_factor)); code.mulss(result, code.XmmBConst<32>(xword, scale_factor));
} }
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
@ -1734,7 +1733,7 @@ void EmitX64::EmitFPFixedU16ToSingle(EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
const u32 scale_factor = static_cast<u32>((127 - fbits) << 23); const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
code.mulss(result, code.MConst(xword, scale_factor)); code.mulss(result, code.XmmBConst<32>(xword, scale_factor));
} }
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
@ -1759,7 +1758,7 @@ void EmitX64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
const u32 scale_factor = static_cast<u32>((127 - fbits) << 23); const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
code.mulss(result, code.MConst(xword, scale_factor)); code.mulss(result, code.XmmBConst<32>(xword, scale_factor));
} }
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
@ -1795,7 +1794,7 @@ void EmitX64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
const u32 scale_factor = static_cast<u32>((127 - fbits) << 23); const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
code.mulss(result, code.MConst(xword, scale_factor)); code.mulss(result, code.XmmBConst<32>(xword, scale_factor));
} }
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
@ -1815,7 +1814,7 @@ void EmitX64::EmitFPFixedS16ToDouble(EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52); const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
code.mulsd(result, code.MConst(xword, scale_factor)); code.mulsd(result, code.XmmBConst<64>(xword, scale_factor));
} }
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
@ -1835,7 +1834,7 @@ void EmitX64::EmitFPFixedU16ToDouble(EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52); const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
code.mulsd(result, code.MConst(xword, scale_factor)); code.mulsd(result, code.XmmBConst<64>(xword, scale_factor));
} }
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
@ -1853,7 +1852,7 @@ void EmitX64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52); const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
code.mulsd(result, code.MConst(xword, scale_factor)); code.mulsd(result, code.XmmBConst<64>(xword, scale_factor));
} }
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
@ -1878,7 +1877,7 @@ void EmitX64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52); const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
code.mulsd(to, code.MConst(xword, scale_factor)); code.mulsd(to, code.XmmBConst<64>(xword, scale_factor));
} }
ctx.reg_alloc.DefineValue(inst, to); ctx.reg_alloc.DefineValue(inst, to);
@ -1897,7 +1896,7 @@ void EmitX64::EmitFPFixedS64ToDouble(EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52); const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
code.mulsd(result, code.MConst(xword, scale_factor)); code.mulsd(result, code.XmmBConst<64>(xword, scale_factor));
} }
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
@ -1916,7 +1915,7 @@ void EmitX64::EmitFPFixedS64ToSingle(EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
const u32 scale_factor = static_cast<u32>((127 - fbits) << 23); const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
code.mulss(result, code.MConst(xword, scale_factor)); code.mulss(result, code.XmmBConst<32>(xword, scale_factor));
} }
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
@ -1937,18 +1936,18 @@ void EmitX64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code.movq(tmp, from); code.movq(tmp, from);
code.punpckldq(tmp, code.MConst(xword, 0x4530000043300000, 0)); code.punpckldq(tmp, code.XmmConst(xword, 0x4530000043300000, 0));
code.subpd(tmp, code.MConst(xword, 0x4330000000000000, 0x4530000000000000)); code.subpd(tmp, code.XmmConst(xword, 0x4330000000000000, 0x4530000000000000));
code.pshufd(result, tmp, 0b01001110); code.pshufd(result, tmp, 0b01001110);
code.addpd(result, tmp); code.addpd(result, tmp);
if (ctx.FPCR().RMode() == FP::RoundingMode::TowardsMinusInfinity) { if (ctx.FPCR().RMode() == FP::RoundingMode::TowardsMinusInfinity) {
code.pand(result, code.MConst(xword, f64_non_sign_mask)); code.pand(result, code.XmmBConst<64>(xword, f64_non_sign_mask));
} }
} }
if (fbits != 0) { if (fbits != 0) {
const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52); const u64 scale_factor = static_cast<u64>((1023 - fbits) << 52);
code.mulsd(result, code.MConst(xword, scale_factor)); code.mulsd(result, code.XmmBConst<64>(xword, scale_factor));
} }
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
@ -1992,7 +1991,7 @@ void EmitX64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) {
if (fbits != 0) { if (fbits != 0) {
const u32 scale_factor = static_cast<u32>((127 - fbits) << 23); const u32 scale_factor = static_cast<u32>((127 - fbits) << 23);
code.mulss(result, code.MConst(xword, scale_factor)); code.mulss(result, code.XmmBConst<32>(xword, scale_factor));
} }
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);

View file

@ -94,8 +94,8 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
// !(b <= a+b) == b > a+b // !(b <= a+b) == b > a+b
code.movdqa(tmp_a, xmm_a); code.movdqa(tmp_a, xmm_a);
code.movdqa(tmp_b, xmm_b); code.movdqa(tmp_b, xmm_b);
code.paddw(tmp_a, code.MConst(xword, 0x80008000)); code.paddw(tmp_a, code.XmmBConst<16>(xword, 0x8000));
code.paddw(tmp_b, code.MConst(xword, 0x80008000)); code.paddw(tmp_b, code.XmmBConst<16>(xword, 0x8000));
code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison! code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
ctx.reg_alloc.DefineValue(ge_inst, tmp_b); ctx.reg_alloc.DefineValue(ge_inst, tmp_b);
@ -217,8 +217,8 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
// (a >= b) == !(b > a) // (a >= b) == !(b > a)
code.pcmpeqb(ones, ones); code.pcmpeqb(ones, ones);
code.paddw(xmm_a, code.MConst(xword, 0x80008000)); code.paddw(xmm_a, code.XmmBConst<16>(xword, 0x8000));
code.paddw(xmm_b, code.MConst(xword, 0x80008000)); code.paddw(xmm_b, code.XmmBConst<16>(xword, 0x8000));
code.movdqa(xmm_ge, xmm_b); code.movdqa(xmm_ge, xmm_b);
code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison! code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
code.pxor(xmm_ge, ones); code.pxor(xmm_ge, ones);
@ -654,7 +654,7 @@ void EmitX64::EmitPackedAbsDiffSumS8(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
// TODO: Optimize with zero-extension detection // TODO: Optimize with zero-extension detection
code.movaps(tmp, code.MConst(xword, 0xFFFFFFFF)); code.movaps(tmp, code.XmmBConst<8>(xword, 0xFF));
code.pand(xmm_a, tmp); code.pand(xmm_a, tmp);
code.pand(xmm_b, tmp); code.pand(xmm_b, tmp);
code.psadbw(xmm_a, xmm_b); code.psadbw(xmm_a, xmm_b);

View file

@ -458,7 +458,7 @@ static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const
const u64 shift_matrix = shift_amount < 8 const u64 shift_matrix = shift_amount < 8
? (0x0102040810204080 << (shift_amount * 8)) | (0x8080808080808080 >> (64 - shift_amount * 8)) ? (0x0102040810204080 << (shift_amount * 8)) | (0x8080808080808080 >> (64 - shift_amount * 8))
: 0x8080808080808080; : 0x8080808080808080;
code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0); code.gf2p8affineqb(result, code.XmmBConst<64>(xword, shift_matrix), 0);
return; return;
} }
@ -519,7 +519,7 @@ void EmitX64::EmitVectorArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst)
code.pxor(tmp2, tmp2); code.pxor(tmp2, tmp2);
code.psrlq(result, shift_amount); code.psrlq(result, shift_amount);
code.movdqa(tmp1, code.MConst(xword, sign_bit, sign_bit)); code.movdqa(tmp1, code.XmmBConst<64>(xword, sign_bit));
code.pand(tmp1, result); code.pand(tmp1, result);
code.psubq(tmp2, tmp1); code.psubq(tmp2, tmp1);
code.por(result, tmp2); code.por(result, tmp2);
@ -571,7 +571,7 @@ void EmitX64::EmitVectorArithmeticVShift16(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm right_shift = xmm16; const Xbyak::Xmm right_shift = xmm16;
const Xbyak::Xmm tmp = xmm17; const Xbyak::Xmm tmp = xmm17;
code.vmovdqa32(tmp, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vmovdqa32(tmp, code.XmmBConst<16>(xword, 0x00FF));
code.vpxord(right_shift, right_shift, right_shift); code.vpxord(right_shift, right_shift, right_shift);
code.vpsubw(right_shift, right_shift, left_shift); code.vpsubw(right_shift, right_shift, left_shift);
@ -606,7 +606,7 @@ void EmitX64::EmitVectorArithmeticVShift32(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code.vmovdqa(tmp, code.MConst(xword, 0x000000FF000000FF, 0x000000FF000000FF)); code.vmovdqa(tmp, code.XmmBConst<32>(xword, 0x000000FF));
code.vpxor(right_shift, right_shift, right_shift); code.vpxor(right_shift, right_shift, right_shift);
code.vpsubd(right_shift, right_shift, left_shift); code.vpsubd(right_shift, right_shift, left_shift);
@ -637,7 +637,7 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm right_shift = xmm16; const Xbyak::Xmm right_shift = xmm16;
const Xbyak::Xmm tmp = xmm17; const Xbyak::Xmm tmp = xmm17;
code.vmovdqa32(tmp, code.MConst(xword, 0x00000000000000FF, 0x00000000000000FF)); code.vmovdqa32(tmp, code.XmmBConst<64>(xword, 0x00000000000000FF));
code.vpxorq(right_shift, right_shift, right_shift); code.vpxorq(right_shift, right_shift, right_shift);
code.vpsubq(right_shift, right_shift, left_shift); code.vpsubq(right_shift, right_shift, left_shift);
@ -925,15 +925,15 @@ void EmitX64::EmitVectorCountLeadingZeros8(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
code.movdqa(tmp1, code.MConst(xword, 0x0101010102020304, 0x0000000000000000)); code.movdqa(tmp1, code.XmmConst(xword, 0x0101010102020304, 0x0000000000000000));
code.movdqa(tmp2, tmp1); code.movdqa(tmp2, tmp1);
code.pshufb(tmp2, data); code.pshufb(tmp2, data);
code.psrlw(data, 4); code.psrlw(data, 4);
code.pand(data, code.MConst(xword, 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F)); code.pand(data, code.XmmBConst<8>(xword, 0x0F));
code.pshufb(tmp1, data); code.pshufb(tmp1, data);
code.movdqa(data, code.MConst(xword, 0x0404040404040404, 0x0404040404040404)); code.movdqa(data, code.XmmBConst<8>(xword, 0x04));
code.pcmpeqb(data, tmp1); code.pcmpeqb(data, tmp1);
code.pand(data, tmp2); code.pand(data, tmp2);
@ -966,11 +966,11 @@ void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) {
code.vpcmpeqw(zeros, zeros, zeros); code.vpcmpeqw(zeros, zeros, zeros);
code.vpcmpeqw(tmp, tmp, tmp); code.vpcmpeqw(tmp, tmp, tmp);
code.vpcmpeqw(zeros, zeros, data); code.vpcmpeqw(zeros, zeros, data);
code.vpmullw(data, data, code.MConst(xword, 0xf0d3f0d3f0d3f0d3, 0xf0d3f0d3f0d3f0d3)); code.vpmullw(data, data, code.XmmBConst<16>(xword, 0xf0d3));
code.vpsllw(tmp, tmp, 15); code.vpsllw(tmp, tmp, 15);
code.vpsllw(zeros, zeros, 7); code.vpsllw(zeros, zeros, 7);
code.vpsrlw(data, data, 12); code.vpsrlw(data, data, 12);
code.vmovdqa(result, code.MConst(xword, 0x0903060a040b0c10, 0x0f080e0207050d01)); code.vmovdqa(result, code.XmmConst(xword, 0x0903060a040b0c10, 0x0f080e0207050d01));
code.vpor(tmp, tmp, zeros); code.vpor(tmp, tmp, zeros);
code.vpor(data, data, tmp); code.vpor(data, data, tmp);
code.vpshufb(result, result, data); code.vpshufb(result, result, data);
@ -1002,11 +1002,11 @@ void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) {
code.pcmpeqw(zeros, zeros); code.pcmpeqw(zeros, zeros);
code.pcmpeqw(tmp, tmp); code.pcmpeqw(tmp, tmp);
code.pcmpeqw(zeros, data); code.pcmpeqw(zeros, data);
code.pmullw(data, code.MConst(xword, 0xf0d3f0d3f0d3f0d3, 0xf0d3f0d3f0d3f0d3)); code.pmullw(data, code.XmmBConst<16>(xword, 0xf0d3));
code.psllw(tmp, 15); code.psllw(tmp, 15);
code.psllw(zeros, 7); code.psllw(zeros, 7);
code.psrlw(data, 12); code.psrlw(data, 12);
code.movdqa(result, code.MConst(xword, 0x0903060a040b0c10, 0x0f080e0207050d01)); code.movdqa(result, code.XmmConst(xword, 0x0903060a040b0c10, 0x0f080e0207050d01));
code.por(tmp, zeros); code.por(tmp, zeros);
code.por(data, tmp); code.por(data, tmp);
code.pshufb(result, data); code.pshufb(result, data);
@ -1038,7 +1038,7 @@ void EmitX64::EmitVectorDeinterleaveEven8(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code.movdqa(tmp, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.movdqa(tmp, code.XmmBConst<16>(xword, 0x00FF));
code.pand(lhs, tmp); code.pand(lhs, tmp);
code.pand(rhs, tmp); code.pand(rhs, tmp);
code.packuswb(lhs, rhs); code.packuswb(lhs, rhs);
@ -1088,7 +1088,7 @@ void EmitX64::EmitVectorDeinterleaveEvenLower8(EmitContext& ctx, IR::Inst* inst)
const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code.movdqa(tmp, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.movdqa(tmp, code.XmmBConst<16>(xword, 0x00FF));
code.pand(lhs, tmp); code.pand(lhs, tmp);
code.pand(rhs, tmp); code.pand(rhs, tmp);
code.packuswb(lhs, rhs); code.packuswb(lhs, rhs);
@ -1423,13 +1423,13 @@ static void EmitVectorHalvingAddUnsigned(size_t esize, EmitContext& ctx, IR::Ins
case 8: case 8:
code.pavgb(tmp, a); code.pavgb(tmp, a);
code.pxor(a, b); code.pxor(a, b);
code.pand(a, code.MConst(xword, 0x0101010101010101, 0x0101010101010101)); code.pand(a, code.XmmBConst<8>(xword, 0x01));
code.psubb(tmp, a); code.psubb(tmp, a);
break; break;
case 16: case 16:
code.pavgw(tmp, a); code.pavgw(tmp, a);
code.pxor(a, b); code.pxor(a, b);
code.pand(a, code.MConst(xword, 0x0001000100010001, 0x0001000100010001)); code.pand(a, code.XmmBConst<16>(xword, 0x0001));
code.psubw(tmp, a); code.psubw(tmp, a);
break; break;
case 32: case 32:
@ -1464,7 +1464,7 @@ static void EmitVectorHalvingSubSigned(size_t esize, EmitContext& ctx, IR::Inst*
switch (esize) { switch (esize) {
case 8: { case 8: {
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code.movdqa(tmp, code.MConst(xword, 0x8080808080808080, 0x8080808080808080)); code.movdqa(tmp, code.XmmBConst<8>(xword, 0x80));
code.pxor(a, tmp); code.pxor(a, tmp);
code.pxor(b, tmp); code.pxor(b, tmp);
code.pavgb(b, a); code.pavgb(b, a);
@ -1473,7 +1473,7 @@ static void EmitVectorHalvingSubSigned(size_t esize, EmitContext& ctx, IR::Inst*
} }
case 16: { case 16: {
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code.movdqa(tmp, code.MConst(xword, 0x8000800080008000, 0x8000800080008000)); code.movdqa(tmp, code.XmmBConst<16>(xword, 0x8000));
code.pxor(a, tmp); code.pxor(a, tmp);
code.pxor(b, tmp); code.pxor(b, tmp);
code.pavgw(b, a); code.pavgw(b, a);
@ -1635,13 +1635,13 @@ void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) {
code.paddb(result, result); code.paddb(result, result);
} else if (code.HasHostFeature(HostFeature::GFNI)) { } else if (code.HasHostFeature(HostFeature::GFNI)) {
const u64 shift_matrix = 0x0102040810204080 >> (shift_amount * 8); const u64 shift_matrix = 0x0102040810204080 >> (shift_amount * 8);
code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0); code.gf2p8affineqb(result, code.XmmBConst<64>(xword, shift_matrix), 0);
} else { } else {
const u64 replicand = (0xFFULL << shift_amount) & 0xFF; const u64 replicand = (0xFFULL << shift_amount) & 0xFF;
const u64 mask = mcl::bit::replicate_element<u8, u64>(replicand); const u64 mask = mcl::bit::replicate_element<u8, u64>(replicand);
code.psllw(result, shift_amount); code.psllw(result, shift_amount);
code.pand(result, code.MConst(xword, mask, mask)); code.pand(result, code.XmmBConst<64>(xword, mask));
} }
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
@ -1692,13 +1692,13 @@ void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) {
code.pxor(result, result); code.pxor(result, result);
} else if (code.HasHostFeature(HostFeature::GFNI)) { } else if (code.HasHostFeature(HostFeature::GFNI)) {
const u64 shift_matrix = 0x0102040810204080 << (shift_amount * 8); const u64 shift_matrix = 0x0102040810204080 << (shift_amount * 8);
code.gf2p8affineqb(result, code.MConst(xword, shift_matrix, shift_matrix), 0); code.gf2p8affineqb(result, code.XmmBConst<64>(xword, shift_matrix), 0);
} else { } else {
const u64 replicand = 0xFEULL >> shift_amount; const u64 replicand = 0xFEULL >> shift_amount;
const u64 mask = mcl::bit::replicate_element<u8, u64>(replicand); const u64 mask = mcl::bit::replicate_element<u8, u64>(replicand);
code.psrlw(result, shift_amount); code.psrlw(result, shift_amount);
code.pand(result, code.MConst(xword, mask, mask)); code.pand(result, code.XmmConst(xword, mask, mask));
} }
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
@ -1752,7 +1752,7 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm right_shift = xmm16; const Xbyak::Xmm right_shift = xmm16;
const Xbyak::Xmm tmp = xmm17; const Xbyak::Xmm tmp = xmm17;
code.vmovdqa32(tmp, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vmovdqa32(tmp, code.XmmBConst<16>(xword, 0x00FF));
code.vpxord(right_shift, right_shift, right_shift); code.vpxord(right_shift, right_shift, right_shift);
code.vpsubw(right_shift, right_shift, left_shift); code.vpsubw(right_shift, right_shift, left_shift);
code.vpandd(left_shift, left_shift, tmp); code.vpandd(left_shift, left_shift, tmp);
@ -1780,7 +1780,7 @@ void EmitX64::EmitVectorLogicalVShift32(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code.vmovdqa(tmp, code.MConst(xword, 0x000000FF000000FF, 0x000000FF000000FF)); code.vmovdqa(tmp, code.XmmBConst<32>(xword, 0x000000FF));
code.vpxor(right_shift, right_shift, right_shift); code.vpxor(right_shift, right_shift, right_shift);
code.vpsubd(right_shift, right_shift, left_shift); code.vpsubd(right_shift, right_shift, left_shift);
code.vpand(left_shift, left_shift, tmp); code.vpand(left_shift, left_shift, tmp);
@ -1808,7 +1808,7 @@ void EmitX64::EmitVectorLogicalVShift64(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code.vmovdqa(tmp, code.MConst(xword, 0x00000000000000FF, 0x00000000000000FF)); code.vmovdqa(tmp, code.XmmBConst<64>(xword, 0x00000000000000FF));
code.vpxor(right_shift, right_shift, right_shift); code.vpxor(right_shift, right_shift, right_shift);
code.vpsubq(right_shift, right_shift, left_shift); code.vpsubq(right_shift, right_shift, left_shift);
code.vpand(left_shift, left_shift, tmp); code.vpand(left_shift, left_shift, tmp);
@ -1928,7 +1928,7 @@ void EmitX64::EmitVectorMaxU32(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code.movdqa(tmp, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); code.movdqa(tmp, code.XmmBConst<32>(xword, 0x80000000));
const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm();
code.movdqa(tmp_b, b); code.movdqa(tmp_b, b);
@ -1957,7 +1957,7 @@ void EmitX64::EmitVectorMaxU64(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code.vmovdqa(xmm0, code.MConst(xword, 0x8000000000000000, 0x8000000000000000)); code.vmovdqa(xmm0, code.XmmBConst<64>(xword, 0x8000000000000000));
code.vpsubq(tmp, y, xmm0); code.vpsubq(tmp, y, xmm0);
code.vpsubq(xmm0, x, xmm0); code.vpsubq(xmm0, x, xmm0);
code.vpcmpgtq(xmm0, tmp, xmm0); code.vpcmpgtq(xmm0, tmp, xmm0);
@ -2076,7 +2076,7 @@ void EmitX64::EmitVectorMinU32(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm sint_max_plus_one = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm sint_max_plus_one = ctx.reg_alloc.ScratchXmm();
code.movdqa(sint_max_plus_one, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); code.movdqa(sint_max_plus_one, code.XmmBConst<32>(xword, 0x80000000));
const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm();
code.movdqa(tmp_a, a); code.movdqa(tmp_a, a);
@ -2107,7 +2107,7 @@ void EmitX64::EmitVectorMinU64(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code.vmovdqa(xmm0, code.MConst(xword, 0x8000000000000000, 0x8000000000000000)); code.vmovdqa(xmm0, code.XmmBConst<64>(xword, 0x8000000000000000));
code.vpsubq(tmp, y, xmm0); code.vpsubq(tmp, y, xmm0);
code.vpsubq(xmm0, x, xmm0); code.vpsubq(xmm0, x, xmm0);
code.vpcmpgtq(xmm0, tmp, xmm0); code.vpcmpgtq(xmm0, tmp, xmm0);
@ -2136,7 +2136,7 @@ void EmitX64::EmitVectorMultiply8(EmitContext& ctx, IR::Inst* inst) {
code.psrlw(tmp_a, 8); code.psrlw(tmp_a, 8);
code.psrlw(tmp_b, 8); code.psrlw(tmp_b, 8);
code.pmullw(tmp_a, tmp_b); code.pmullw(tmp_a, tmp_b);
code.pand(a, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.pand(a, code.XmmBConst<16>(xword, 0x00FF));
code.psllw(tmp_a, 8); code.psllw(tmp_a, 8);
code.por(a, tmp_a); code.por(a, tmp_a);
@ -2238,7 +2238,7 @@ void EmitX64::EmitVectorNarrow16(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm();
code.pxor(zeros, zeros); code.pxor(zeros, zeros);
code.pand(a, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.pand(a, code.XmmBConst<16>(xword, 0x00FF));
code.packuswb(a, zeros); code.packuswb(a, zeros);
ctx.reg_alloc.DefineValue(inst, a); ctx.reg_alloc.DefineValue(inst, a);
@ -2522,7 +2522,7 @@ void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst)
code.movdqa(c, a); code.movdqa(c, a);
code.psllq(a, 32); code.psllq(a, 32);
code.movdqa(tmp1, code.MConst(xword, 0x80000000'00000000, 0x80000000'00000000)); code.movdqa(tmp1, code.XmmBConst<64>(xword, 0x80000000'00000000));
code.movdqa(tmp2, tmp1); code.movdqa(tmp2, tmp1);
code.pand(tmp1, a); code.pand(tmp1, a);
code.pand(tmp2, c); code.pand(tmp2, c);
@ -2674,7 +2674,7 @@ void EmitX64::EmitVectorPairedMaxU32(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, x); ctx.reg_alloc.DefineValue(inst, x);
} else { } else {
const Xbyak::Xmm tmp3 = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp3 = ctx.reg_alloc.ScratchXmm();
code.movdqa(tmp3, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); code.movdqa(tmp3, code.XmmBConst<32>(xword, 0x80000000));
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
code.movdqa(tmp2, x); code.movdqa(tmp2, x);
@ -2759,7 +2759,7 @@ void EmitX64::EmitVectorPairedMinU32(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, x); ctx.reg_alloc.DefineValue(inst, x);
} else { } else {
const Xbyak::Xmm tmp3 = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp3 = ctx.reg_alloc.ScratchXmm();
code.movdqa(tmp3, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); code.movdqa(tmp3, code.XmmBConst<32>(xword, 0x80000000));
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
code.movdqa(tmp2, tmp1); code.movdqa(tmp2, tmp1);
@ -2803,7 +2803,7 @@ void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) {
Xbyak::Label loop; Xbyak::Label loop;
code.pxor(result, result); code.pxor(result, result);
code.movdqa(mask, code.MConst(xword, 0x0101010101010101, 0x0101010101010101)); code.movdqa(mask, code.XmmBConst<8>(xword, 0x01));
code.mov(counter, 8); code.mov(counter, 8);
code.L(loop); code.L(loop);
@ -2847,7 +2847,7 @@ void EmitX64::EmitVectorPolynomialMultiplyLong8(EmitContext& ctx, IR::Inst* inst
code.pmovzxbw(xmm_a, xmm_a); code.pmovzxbw(xmm_a, xmm_a);
code.pmovzxbw(xmm_b, xmm_b); code.pmovzxbw(xmm_b, xmm_b);
code.pxor(result, result); code.pxor(result, result);
code.movdqa(mask, code.MConst(xword, 0x0001000100010001, 0x0001000100010001)); code.movdqa(mask, code.XmmBConst<16>(xword, 0x0001));
code.mov(counter, 8); code.mov(counter, 8);
code.L(loop); code.L(loop);
@ -2930,11 +2930,11 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) {
code.movdqa(high_a, low_a); code.movdqa(high_a, low_a);
code.psrlw(high_a, 4); code.psrlw(high_a, 4);
code.movdqa(tmp1, code.MConst(xword, 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F)); code.movdqa(tmp1, code.XmmBConst<8>(xword, 0x0F));
code.pand(high_a, tmp1); // High nibbles code.pand(high_a, tmp1); // High nibbles
code.pand(low_a, tmp1); // Low nibbles code.pand(low_a, tmp1); // Low nibbles
code.movdqa(tmp1, code.MConst(xword, 0x0302020102010100, 0x0403030203020201)); code.movdqa(tmp1, code.XmmConst(xword, 0x0302020102010100, 0x0403030203020201));
code.movdqa(tmp2, tmp1); code.movdqa(tmp2, tmp1);
code.pshufb(tmp1, low_a); code.pshufb(tmp1, low_a);
code.pshufb(tmp2, high_a); code.pshufb(tmp2, high_a);
@ -2958,10 +2958,10 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
if (code.HasHostFeature(HostFeature::GFNI)) { if (code.HasHostFeature(HostFeature::GFNI)) {
code.gf2p8affineqb(data, code.MConst(xword, 0x8040201008040201, 0x8040201008040201), 0); code.gf2p8affineqb(data, code.XmmBConst<64>(xword, 0x8040201008040201), 0);
} else { } else {
const Xbyak::Xmm high_nibble_reg = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm high_nibble_reg = ctx.reg_alloc.ScratchXmm();
code.movdqa(high_nibble_reg, code.MConst(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); code.movdqa(high_nibble_reg, code.XmmBConst<8>(xword, 0xF0));
code.pand(high_nibble_reg, data); code.pand(high_nibble_reg, data);
code.pxor(data, high_nibble_reg); code.pxor(data, high_nibble_reg);
code.psrld(high_nibble_reg, 4); code.psrld(high_nibble_reg, 4);
@ -2969,25 +2969,25 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) {
if (code.HasHostFeature(HostFeature::SSSE3)) { if (code.HasHostFeature(HostFeature::SSSE3)) {
// High lookup // High lookup
const Xbyak::Xmm high_reversed_reg = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm high_reversed_reg = ctx.reg_alloc.ScratchXmm();
code.movdqa(high_reversed_reg, code.MConst(xword, 0xE060A020C0408000, 0xF070B030D0509010)); code.movdqa(high_reversed_reg, code.XmmConst(xword, 0xE060A020C0408000, 0xF070B030D0509010));
code.pshufb(high_reversed_reg, data); code.pshufb(high_reversed_reg, data);
// Low lookup (low nibble equivalent of the above) // Low lookup (low nibble equivalent of the above)
code.movdqa(data, code.MConst(xword, 0x0E060A020C040800, 0x0F070B030D050901)); code.movdqa(data, code.XmmConst(xword, 0x0E060A020C040800, 0x0F070B030D050901));
code.pshufb(data, high_nibble_reg); code.pshufb(data, high_nibble_reg);
code.por(data, high_reversed_reg); code.por(data, high_reversed_reg);
} else { } else {
code.pslld(data, 4); code.pslld(data, 4);
code.por(data, high_nibble_reg); code.por(data, high_nibble_reg);
code.movdqa(high_nibble_reg, code.MConst(xword, 0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC)); code.movdqa(high_nibble_reg, code.XmmBConst<8>(xword, 0xCC));
code.pand(high_nibble_reg, data); code.pand(high_nibble_reg, data);
code.pxor(data, high_nibble_reg); code.pxor(data, high_nibble_reg);
code.psrld(high_nibble_reg, 2); code.psrld(high_nibble_reg, 2);
code.pslld(data, 2); code.pslld(data, 2);
code.por(data, high_nibble_reg); code.por(data, high_nibble_reg);
code.movdqa(high_nibble_reg, code.MConst(xword, 0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA)); code.movdqa(high_nibble_reg, code.XmmBConst<8>(xword, 0xAA));
code.pand(high_nibble_reg, data); code.pand(high_nibble_reg, data);
code.pxor(data, high_nibble_reg); code.pxor(data, high_nibble_reg);
code.psrld(high_nibble_reg, 1); code.psrld(high_nibble_reg, 1);
@ -3037,7 +3037,7 @@ void EmitX64::EmitVectorReduceAdd16(EmitContext& ctx, IR::Inst* inst) {
code.paddw(data, temp); code.paddw(data, temp);
// Add pairs of 16-bit values into 32-bit lanes // Add pairs of 16-bit values into 32-bit lanes
code.movdqa(temp, code.MConst(xword, 0x0001000100010001, 0x0001000100010001)); code.movdqa(temp, code.XmmBConst<16>(xword, 0x0001));
code.pmaddwd(data, temp); code.pmaddwd(data, temp);
// Sum adjacent 32-bit lanes // Sum adjacent 32-bit lanes
@ -3100,7 +3100,7 @@ static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, I
switch (esize) { switch (esize) {
case 8: { case 8: {
const Xbyak::Xmm vec_128 = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm vec_128 = ctx.reg_alloc.ScratchXmm();
code.movdqa(vec_128, code.MConst(xword, 0x8080808080808080, 0x8080808080808080)); code.movdqa(vec_128, code.XmmBConst<8>(xword, 0x80));
code.paddb(a, vec_128); code.paddb(a, vec_128);
code.paddb(b, vec_128); code.paddb(b, vec_128);
@ -3110,7 +3110,7 @@ static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, I
} }
case 16: { case 16: {
const Xbyak::Xmm vec_32768 = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm vec_32768 = ctx.reg_alloc.ScratchXmm();
code.movdqa(vec_32768, code.MConst(xword, 0x8000800080008000, 0x8000800080008000)); code.movdqa(vec_32768, code.XmmBConst<16>(xword, 0x8000));
code.paddw(a, vec_32768); code.paddw(a, vec_32768);
code.paddw(b, vec_32768); code.paddw(b, vec_32768);
@ -3506,7 +3506,7 @@ void EmitX64::EmitVectorSignedMultiply32(EmitContext& ctx, IR::Inst* inst) {
code.pand(tmp, y); code.pand(tmp, y);
code.pand(sign_correction, x); code.pand(sign_correction, x);
code.paddd(sign_correction, tmp); code.paddd(sign_correction, tmp);
code.pand(sign_correction, code.MConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF)); code.pand(sign_correction, code.XmmBConst<32>(xword, 0x7FFFFFFF));
// calculate unsigned multiply // calculate unsigned multiply
code.movdqa(tmp, x); code.movdqa(tmp, x);
@ -3547,13 +3547,13 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo
const Xbyak::Address mask = [esize, &code] { const Xbyak::Address mask = [esize, &code] {
switch (esize) { switch (esize) {
case 8: case 8:
return code.MConst(xword, 0x8080808080808080, 0x8080808080808080); return code.XmmBConst<8>(xword, 0x80);
case 16: case 16:
return code.MConst(xword, 0x8000800080008000, 0x8000800080008000); return code.XmmBConst<16>(xword, 0x8000);
case 32: case 32:
return code.MConst(xword, 0x8000000080000000, 0x8000000080000000); return code.XmmBConst<32>(xword, 0x80000000);
case 64: case 64:
return code.MConst(xword, 0x8000000000000000, 0x8000000000000000); return code.XmmBConst<64>(xword, 0x8000000000000000);
default: default:
UNREACHABLE(); UNREACHABLE();
} }
@ -3717,7 +3717,7 @@ static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitC
code.vpblendvb(xmm0, tmp, tmp2, xmm0); code.vpblendvb(xmm0, tmp, tmp2, xmm0);
ctx.reg_alloc.Release(tmp2); ctx.reg_alloc.Release(tmp2);
} else { } else {
code.pand(xmm0, code.MConst(xword, 0x8080808080808080, 0x8080808080808080)); code.pand(xmm0, code.XmmBConst<8>(xword, 0x80));
code.movdqa(tmp, xmm0); code.movdqa(tmp, xmm0);
code.psrlw(tmp, 7); code.psrlw(tmp, 7);
code.pxor(xmm0, xmm0); code.pxor(xmm0, xmm0);
@ -3836,14 +3836,14 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply16(EmitContext& ctx, IR::
code.vpsrlw(lower_tmp, lower_tmp, 15); code.vpsrlw(lower_tmp, lower_tmp, 15);
code.vpaddw(upper_tmp, upper_tmp, upper_tmp); code.vpaddw(upper_tmp, upper_tmp, upper_tmp);
code.vpor(upper_result, upper_tmp, lower_tmp); code.vpor(upper_result, upper_tmp, lower_tmp);
code.vpcmpeqw(upper_tmp, upper_result, code.MConst(xword, 0x8000800080008000, 0x8000800080008000)); code.vpcmpeqw(upper_tmp, upper_result, code.XmmBConst<16>(xword, 0x8000));
code.vpxor(upper_result, upper_result, upper_tmp); code.vpxor(upper_result, upper_result, upper_tmp);
} else { } else {
code.paddw(upper_tmp, upper_tmp); code.paddw(upper_tmp, upper_tmp);
code.psrlw(lower_tmp, 15); code.psrlw(lower_tmp, 15);
code.movdqa(upper_result, upper_tmp); code.movdqa(upper_result, upper_tmp);
code.por(upper_result, lower_tmp); code.por(upper_result, lower_tmp);
code.movdqa(upper_tmp, code.MConst(xword, 0x8000800080008000, 0x8000800080008000)); code.movdqa(upper_tmp, code.XmmBConst<16>(xword, 0x8000));
code.pcmpeqw(upper_tmp, upper_result); code.pcmpeqw(upper_tmp, upper_result);
code.pxor(upper_result, upper_tmp); code.pxor(upper_result, upper_tmp);
} }
@ -3889,7 +3889,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply32(EmitContext& ctx, IR::
const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
code.vpcmpeqd(mask, upper_result, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); code.vpcmpeqd(mask, upper_result, code.XmmBConst<32>(xword, 0x80000000));
code.vpxor(upper_result, upper_result, mask); code.vpxor(upper_result, upper_result, mask);
code.pmovmskb(bit, mask); code.pmovmskb(bit, mask);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
@ -3958,7 +3958,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply32(EmitContext& ctx, IR::
if (upper_inst) { if (upper_inst) {
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
code.movdqa(tmp, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); code.movdqa(tmp, code.XmmBConst<32>(xword, 0x80000000));
code.pcmpeqd(tmp, upper_result); code.pcmpeqd(tmp, upper_result);
code.pxor(upper_result, tmp); code.pxor(upper_result, tmp);
code.pmovmskb(bit, tmp); code.pmovmskb(bit, tmp);
@ -3984,10 +3984,10 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx,
code.pmaddwd(x, y); code.pmaddwd(x, y);
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX)) {
code.vpcmpeqd(y, x, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); code.vpcmpeqd(y, x, code.XmmBConst<32>(xword, 0x80000000));
code.vpxor(x, x, y); code.vpxor(x, x, y);
} else { } else {
code.movdqa(y, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); code.movdqa(y, code.XmmBConst<32>(xword, 0x80000000));
code.pcmpeqd(y, x); code.pcmpeqd(y, x);
code.pxor(x, y); code.pxor(x, y);
} }
@ -4037,11 +4037,11 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx,
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX)) {
code.vpcmpeqq(y, x, code.MConst(xword, 0x8000000000000000, 0x8000000000000000)); code.vpcmpeqq(y, x, code.XmmBConst<64>(xword, 0x8000000000000000));
code.vpxor(x, x, y); code.vpxor(x, x, y);
code.vpmovmskb(bit, y); code.vpmovmskb(bit, y);
} else { } else {
code.movdqa(y, code.MConst(xword, 0x8000000000000000, 0x8000000000000000)); code.movdqa(y, code.XmmBConst<64>(xword, 0x8000000000000000));
code.pcmpeqd(y, x); code.pcmpeqd(y, x);
code.shufps(y, y, 0b11110101); code.shufps(y, y, 0b11110101);
code.pxor(x, y); code.pxor(x, y);
@ -4187,13 +4187,13 @@ static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitCo
const Xbyak::Address mask = [esize, &code] { const Xbyak::Address mask = [esize, &code] {
switch (esize) { switch (esize) {
case 8: case 8:
return code.MConst(xword, 0x8080808080808080, 0x8080808080808080); return code.XmmBConst<8>(xword, 0x80);
case 16: case 16:
return code.MConst(xword, 0x8000800080008000, 0x8000800080008000); return code.XmmBConst<16>(xword, 0x8000);
case 32: case 32:
return code.MConst(xword, 0x8000000080000000, 0x8000000080000000); return code.XmmBConst<32>(xword, 0x80000000);
case 64: case 64:
return code.MConst(xword, 0x8000000000000000, 0x8000000000000000); return code.XmmBConst<64>(xword, 0x8000000000000000);
default: default:
UNREACHABLE(); UNREACHABLE();
} }
@ -4448,7 +4448,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.Release(xmm_table0_upper); ctx.reg_alloc.Release(xmm_table0_upper);
} }
code.paddusb(indicies, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); code.paddusb(indicies, code.XmmConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
code.pshufb(xmm_table0, indicies); code.pshufb(xmm_table0, indicies);
ctx.reg_alloc.DefineValue(inst, xmm_table0); ctx.reg_alloc.DefineValue(inst, xmm_table0);
@ -4467,10 +4467,10 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
} }
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX)) {
code.vpaddusb(xmm0, indicies, code.MConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF)); code.vpaddusb(xmm0, indicies, code.XmmConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF));
} else { } else {
code.movaps(xmm0, indicies); code.movaps(xmm0, indicies);
code.paddusb(xmm0, code.MConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF)); code.paddusb(xmm0, code.XmmConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF));
} }
code.pshufb(xmm_table0, indicies); code.pshufb(xmm_table0, indicies);
code.pblendvb(xmm_table0, defaults); code.pblendvb(xmm_table0, defaults);
@ -4496,12 +4496,12 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
} }
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX)) {
code.vpaddusb(xmm0, indicies, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); code.vpaddusb(xmm0, indicies, code.XmmConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
} else { } else {
code.movaps(xmm0, indicies); code.movaps(xmm0, indicies);
code.paddusb(xmm0, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); code.paddusb(xmm0, code.XmmConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
} }
code.paddusb(indicies, code.MConst(xword, 0x6060606060606060, 0xFFFFFFFFFFFFFFFF)); code.paddusb(indicies, code.XmmConst(xword, 0x6060606060606060, 0xFFFFFFFFFFFFFFFF));
code.pshufb(xmm_table0, xmm0); code.pshufb(xmm_table0, xmm0);
code.pshufb(xmm_table1, indicies); code.pshufb(xmm_table1, indicies);
code.pblendvb(xmm_table0, xmm_table1); code.pblendvb(xmm_table0, xmm_table1);
@ -4528,19 +4528,19 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) {
} }
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX)) {
code.vpaddusb(xmm0, indicies, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); code.vpaddusb(xmm0, indicies, code.XmmConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
} else { } else {
code.movaps(xmm0, indicies); code.movaps(xmm0, indicies);
code.paddusb(xmm0, code.MConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); code.paddusb(xmm0, code.XmmConst(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF));
} }
code.pshufb(xmm_table0, indicies); code.pshufb(xmm_table0, indicies);
code.pshufb(xmm_table1, indicies); code.pshufb(xmm_table1, indicies);
code.pblendvb(xmm_table0, xmm_table1); code.pblendvb(xmm_table0, xmm_table1);
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX)) {
code.vpaddusb(xmm0, indicies, code.MConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF)); code.vpaddusb(xmm0, indicies, code.XmmConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF));
} else { } else {
code.movaps(xmm0, indicies); code.movaps(xmm0, indicies);
code.paddusb(xmm0, code.MConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF)); code.paddusb(xmm0, code.XmmConst(xword, sat_const[table_size], 0xFFFFFFFFFFFFFFFF));
} }
code.pblendvb(xmm_table0, defaults); code.pblendvb(xmm_table0, defaults);
@ -4605,7 +4605,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]); const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]);
const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[1]); const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[1]);
code.vptestnmb(write_mask, indicies, code.MConst(xword, 0xE0E0E0E0E0E0E0E0, 0xE0E0E0E0E0E0E0E0)); code.vptestnmb(write_mask, indicies, code.XmmBConst<8>(xword, 0xE0));
code.vpermi2b(indicies | write_mask, xmm_table0, xmm_table1); code.vpermi2b(indicies | write_mask, xmm_table0, xmm_table1);
ctx.reg_alloc.Release(xmm_table0); ctx.reg_alloc.Release(xmm_table0);
@ -4619,7 +4619,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
// Handle vector-table 2,3 // Handle vector-table 2,3
// vpcmpuble // vpcmpuble
code.vpcmpub(upper_mask, indicies, code.MConst(xword, 0x3F3F3F3F3F3F3F3F, 0x3F3F3F3F3F3F3F3F), CmpInt::LessEqual); code.vpcmpub(upper_mask, indicies, code.XmmBConst<8>(xword, 0x3F), CmpInt::LessEqual);
code.kandnw(write_mask, write_mask, upper_mask); code.kandnw(write_mask, write_mask, upper_mask);
const Xbyak::Xmm xmm_table2 = ctx.reg_alloc.UseScratchXmm(table[2]); const Xbyak::Xmm xmm_table2 = ctx.reg_alloc.UseScratchXmm(table[2]);
@ -4639,7 +4639,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[1]); const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[1]);
const Xbyak::Opmask write_mask = k1; const Xbyak::Opmask write_mask = k1;
code.vptestnmb(write_mask, indicies, code.MConst(xword, 0xE0E0E0E0E0E0E0E0, 0xE0E0E0E0E0E0E0E0)); code.vptestnmb(write_mask, indicies, code.XmmBConst<8>(xword, 0xE0));
code.vpermi2b(indicies, xmm_table0, xmm_table1); code.vpermi2b(indicies, xmm_table0, xmm_table1);
if (is_defaults_zero) { if (is_defaults_zero) {
@ -4656,7 +4656,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(args[2]); const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(args[2]);
const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]); const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]);
code.paddusb(indicies, code.MConst(xword, 0x7070707070707070, 0x7070707070707070)); code.paddusb(indicies, code.XmmBConst<8>(xword, 0x70));
code.pshufb(xmm_table0, indicies); code.pshufb(xmm_table0, indicies);
ctx.reg_alloc.DefineValue(inst, xmm_table0); ctx.reg_alloc.DefineValue(inst, xmm_table0);
@ -4669,10 +4669,10 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]); const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]);
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX)) {
code.vpaddusb(xmm0, indicies, code.MConst(xword, 0x7070707070707070, 0x7070707070707070)); code.vpaddusb(xmm0, indicies, code.XmmBConst<8>(xword, 0x70));
} else { } else {
code.movaps(xmm0, indicies); code.movaps(xmm0, indicies);
code.paddusb(xmm0, code.MConst(xword, 0x7070707070707070, 0x7070707070707070)); code.paddusb(xmm0, code.XmmBConst<8>(xword, 0x70));
} }
code.pshufb(xmm_table0, indicies); code.pshufb(xmm_table0, indicies);
code.pblendvb(xmm_table0, defaults); code.pblendvb(xmm_table0, defaults);
@ -4687,12 +4687,12 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[1]); const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[1]);
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX)) {
code.vpaddusb(xmm0, indicies, code.MConst(xword, 0x7070707070707070, 0x7070707070707070)); code.vpaddusb(xmm0, indicies, code.XmmBConst<8>(xword, 0x70));
} else { } else {
code.movaps(xmm0, indicies); code.movaps(xmm0, indicies);
code.paddusb(xmm0, code.MConst(xword, 0x7070707070707070, 0x7070707070707070)); code.paddusb(xmm0, code.XmmBConst<8>(xword, 0x70));
} }
code.paddusb(indicies, code.MConst(xword, 0x6060606060606060, 0x6060606060606060)); code.paddusb(indicies, code.XmmBConst<8>(xword, 0x60));
code.pshufb(xmm_table0, xmm0); code.pshufb(xmm_table0, xmm0);
code.pshufb(xmm_table1, indicies); code.pshufb(xmm_table1, indicies);
code.pblendvb(xmm_table0, xmm_table1); code.pblendvb(xmm_table0, xmm_table1);
@ -4706,14 +4706,14 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm masked = xmm16; const Xbyak::Xmm masked = xmm16;
code.vpandd(masked, indicies, code.MConst(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); code.vpandd(masked, indicies, code.XmmBConst<8>(xword_b, 0xF0));
for (size_t i = 0; i < table_size; ++i) { for (size_t i = 0; i < table_size; ++i) {
const Xbyak::Xmm xmm_table = ctx.reg_alloc.UseScratchXmm(table[i]); const Xbyak::Xmm xmm_table = ctx.reg_alloc.UseScratchXmm(table[i]);
const Xbyak::Opmask table_mask = k1; const Xbyak::Opmask table_mask = k1;
const u64 table_index = mcl::bit::replicate_element<u8, u64>(i * 16); const u8 table_index = u8(i * 16);
code.vpcmpeqb(table_mask, masked, code.MConst(xword, table_index, table_index)); code.vpcmpeqb(table_mask, masked, code.XmmBConst<8>(xword, i * 16));
if (table_index == 0 && is_defaults_zero) { if (table_index == 0 && is_defaults_zero) {
code.vpshufb(result | table_mask | T_z, xmm_table, indicies); code.vpshufb(result | table_mask | T_z, xmm_table, indicies);
@ -4733,21 +4733,21 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm();
code.movaps(masked, code.MConst(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); code.movaps(masked, code.XmmBConst<8>(xword, 0xF0));
code.pand(masked, indicies); code.pand(masked, indicies);
for (size_t i = 0; i < table_size; ++i) { for (size_t i = 0; i < table_size; ++i) {
const Xbyak::Xmm xmm_table = ctx.reg_alloc.UseScratchXmm(table[i]); const Xbyak::Xmm xmm_table = ctx.reg_alloc.UseScratchXmm(table[i]);
const u64 table_index = mcl::bit::replicate_element<u8, u64>(i * 16); const u8 table_index = u8(i * 16);
if (table_index == 0) { if (table_index == 0) {
code.pxor(xmm0, xmm0); code.pxor(xmm0, xmm0);
code.pcmpeqb(xmm0, masked); code.pcmpeqb(xmm0, masked);
} else if (code.HasHostFeature(HostFeature::AVX)) { } else if (code.HasHostFeature(HostFeature::AVX)) {
code.vpcmpeqb(xmm0, masked, code.MConst(xword, table_index, table_index)); code.vpcmpeqb(xmm0, masked, code.XmmBConst<8>(xword, table_index));
} else { } else {
code.movaps(xmm0, code.MConst(xword, table_index, table_index)); code.movaps(xmm0, code.XmmBConst<8>(xword, table_index));
code.pcmpeqb(xmm0, masked); code.pcmpeqb(xmm0, masked);
} }
code.pshufb(xmm_table, indicies); code.pshufb(xmm_table, indicies);
@ -4805,11 +4805,11 @@ void EmitX64::EmitVectorTranspose8(EmitContext& ctx, IR::Inst* inst) {
const bool part = args[2].GetImmediateU1(); const bool part = args[2].GetImmediateU1();
if (!part) { if (!part) {
code.pand(lower, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.pand(lower, code.XmmBConst<16>(xword, 0x00FF));
code.psllw(upper, 8); code.psllw(upper, 8);
} else { } else {
code.psrlw(lower, 8); code.psrlw(lower, 8);
code.pand(upper, code.MConst(xword, 0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00)); code.pand(upper, code.XmmBConst<16>(xword, 0xFF00));
} }
code.por(lower, upper); code.por(lower, upper);
@ -4824,11 +4824,11 @@ void EmitX64::EmitVectorTranspose16(EmitContext& ctx, IR::Inst* inst) {
const bool part = args[2].GetImmediateU1(); const bool part = args[2].GetImmediateU1();
if (!part) { if (!part) {
code.pand(lower, code.MConst(xword, 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF)); code.pand(lower, code.XmmBConst<32>(xword, 0x0000FFFF));
code.pslld(upper, 16); code.pslld(upper, 16);
} else { } else {
code.psrld(lower, 16); code.psrld(lower, 16);
code.pand(upper, code.MConst(xword, 0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000)); code.pand(upper, code.XmmBConst<32>(xword, 0xFFFF0000));
} }
code.por(lower, upper); code.por(lower, upper);
@ -4899,7 +4899,7 @@ static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx,
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
code.movdqa(temp, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); code.movdqa(temp, code.XmmBConst<32>(xword, 0x80000000));
code.pxor(x, temp); code.pxor(x, temp);
code.pxor(y, temp); code.pxor(y, temp);
code.movdqa(temp, x); code.movdqa(temp, x);

View file

@ -146,20 +146,12 @@ void HandleNaNs(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, std::
template<size_t fsize> template<size_t fsize>
Xbyak::Address GetVectorOf(BlockOfCode& code, u64 value) { Xbyak::Address GetVectorOf(BlockOfCode& code, u64 value) {
if constexpr (fsize == 32) { return code.XmmBConst<fsize>(xword, value);
return code.MConst(xword, (value << 32) | value, (value << 32) | value);
} else {
return code.MConst(xword, value, value);
}
} }
template<size_t fsize, u64 value> template<size_t fsize, u64 value>
Xbyak::Address GetVectorOf(BlockOfCode& code) { Xbyak::Address GetVectorOf(BlockOfCode& code) {
if constexpr (fsize == 32) { return code.XmmBConst<fsize>(xword, value);
return code.MConst(xword, (value << 32) | value, (value << 32) | value);
} else {
return code.MConst(xword, value, value);
}
} }
template<size_t fsize> template<size_t fsize>
@ -215,7 +207,7 @@ void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm result) {
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero, constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero,
FpFixup::PosZero); FpFixup::PosZero);
FCODE(vfixupimmp)(result, result, code.MConst(ptr_b, u64(nan_to_zero)), u8(0)); FCODE(vfixupimmp)(result, result, code.XmmBConst<32>(ptr_b, nan_to_zero), u8(0));
} else if (code.HasHostFeature(HostFeature::AVX)) { } else if (code.HasHostFeature(HostFeature::AVX)) {
FCODE(vcmpordp)(nan_mask, result, result); FCODE(vcmpordp)(nan_mask, result, result);
FCODE(vandp)(result, result, nan_mask); FCODE(vandp)(result, result, nan_mask);
@ -239,9 +231,8 @@ void DenormalsAreZero(BlockOfCode& code, FP::FPCR fpcr, std::initializer_list<Xb
FpFixup::Norm_Src, FpFixup::Norm_Src,
FpFixup::Norm_Src, FpFixup::Norm_Src,
FpFixup::Norm_Src); FpFixup::Norm_Src);
constexpr u64 denormal_to_zero64 = mcl::bit::replicate_element<fsize, u64>(denormal_to_zero);
FCODE(vmovap)(tmp, code.MConst(xword, u64(denormal_to_zero64), u64(denormal_to_zero64))); FCODE(vmovap)(tmp, code.XmmBConst<fsize>(xword, denormal_to_zero));
for (const Xbyak::Xmm& xmm : to_daz) { for (const Xbyak::Xmm& xmm : to_daz) {
FCODE(vfixupimmp)(xmm, xmm, tmp, u8(0)); FCODE(vfixupimmp)(xmm, xmm, tmp, u8(0));
@ -589,12 +580,11 @@ template<size_t fsize>
void FPVectorAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { void FPVectorAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mcl::unsigned_integer_of_size<fsize>; using FPT = mcl::unsigned_integer_of_size<fsize>;
constexpr FPT non_sign_mask = FP::FPInfo<FPT>::sign_mask - FPT(1u); constexpr FPT non_sign_mask = FP::FPInfo<FPT>::sign_mask - FPT(1u);
constexpr u64 non_sign_mask64 = mcl::bit::replicate_element<fsize, u64>(non_sign_mask);
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Address mask = code.MConst(xword, non_sign_mask64, non_sign_mask64); const Xbyak::Address mask = code.XmmBConst<fsize>(xword, non_sign_mask);
code.andps(a, mask); code.andps(a, mask);
@ -788,9 +778,9 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst)
if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { if (code.HasHostFeature(HostFeature::AVX512_Ortho)) {
code.vcvtudq2ps(xmm, xmm); code.vcvtudq2ps(xmm, xmm);
} else { } else {
const Xbyak::Address mem_4B000000 = code.MConst(xword, 0x4B0000004B000000, 0x4B0000004B000000); const Xbyak::Address mem_4B000000 = code.XmmBConst<32>(xword, 0x4B000000);
const Xbyak::Address mem_53000000 = code.MConst(xword, 0x5300000053000000, 0x5300000053000000); const Xbyak::Address mem_53000000 = code.XmmBConst<32>(xword, 0x53000000);
const Xbyak::Address mem_D3000080 = code.MConst(xword, 0xD3000080D3000080, 0xD3000080D3000080); const Xbyak::Address mem_D3000080 = code.XmmBConst<32>(xword, 0xD3000080);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
@ -801,7 +791,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst)
code.vaddps(xmm, xmm, mem_D3000080); code.vaddps(xmm, xmm, mem_D3000080);
code.vaddps(xmm, tmp, xmm); code.vaddps(xmm, tmp, xmm);
} else { } else {
const Xbyak::Address mem_0xFFFF = code.MConst(xword, 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF); const Xbyak::Address mem_0xFFFF = code.XmmBConst<32>(xword, 0x0000FFFF);
code.movdqa(tmp, mem_0xFFFF); code.movdqa(tmp, mem_0xFFFF);
@ -819,7 +809,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst)
} }
if (ctx.FPCR(fpcr_controlled).RMode() == FP::RoundingMode::TowardsMinusInfinity) { if (ctx.FPCR(fpcr_controlled).RMode() == FP::RoundingMode::TowardsMinusInfinity) {
code.pand(xmm, code.MConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF)); code.pand(xmm, code.XmmBConst<32>(xword, 0x7FFFFFFF));
} }
}); });
@ -838,8 +828,8 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst)
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
code.vcvtuqq2pd(xmm, xmm); code.vcvtuqq2pd(xmm, xmm);
} else { } else {
const Xbyak::Address unpack = code.MConst(xword, 0x4530000043300000, 0); const Xbyak::Address unpack = code.XmmConst(xword, 0x4530000043300000, 0);
const Xbyak::Address subtrahend = code.MConst(xword, 0x4330000000000000, 0x4530000000000000); const Xbyak::Address subtrahend = code.XmmConst(xword, 0x4330000000000000, 0x4530000000000000);
const Xbyak::Xmm unpack_reg = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm unpack_reg = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm subtrahend_reg = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm subtrahend_reg = ctx.reg_alloc.ScratchXmm();
@ -886,7 +876,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst)
} }
if (ctx.FPCR(fpcr_controlled).RMode() == FP::RoundingMode::TowardsMinusInfinity) { if (ctx.FPCR(fpcr_controlled).RMode() == FP::RoundingMode::TowardsMinusInfinity) {
code.pand(xmm, code.MConst(xword, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF)); code.pand(xmm, code.XmmBConst<64>(xword, 0x7FFFFFFFFFFFFFFF));
} }
}); });
@ -1255,12 +1245,11 @@ template<size_t fsize>
void FPVectorNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { void FPVectorNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mcl::unsigned_integer_of_size<fsize>; using FPT = mcl::unsigned_integer_of_size<fsize>;
constexpr FPT sign_mask = FP::FPInfo<FPT>::sign_mask; constexpr FPT sign_mask = FP::FPInfo<FPT>::sign_mask;
constexpr u64 sign_mask64 = mcl::bit::replicate_element<fsize, u64>(sign_mask);
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Address mask = code.MConst(xword, sign_mask64, sign_mask64); const Xbyak::Address mask = code.XmmBConst<fsize>(xword, sign_mask);
code.xorps(a, mask); code.xorps(a, mask);

View file

@ -72,7 +72,7 @@ enum class Op {
template<Op op, size_t esize> template<Op op, size_t esize>
void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
static_assert(esize == 32 || esize == 64); static_assert(esize == 32 || esize == 64);
constexpr u64 msb_mask = esize == 32 ? 0x8000000080000000 : 0x8000000000000000; constexpr u64 msb_mask = esize == 32 ? 0x80000000 : 0x8000000000000000;
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -97,7 +97,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.vpmovq2m(k1, xmm0); code.vpmovq2m(k1, xmm0);
} }
ICODE(vpsra)(result | k1, result, u8(esize - 1)); ICODE(vpsra)(result | k1, result, u8(esize - 1));
ICODE(vpxor)(result | k1, result, code.MConst(xword_b, msb_mask, msb_mask)); ICODE(vpxor)(result | k1, result, code.XmmBConst<esize>(xword_b, msb_mask));
code.ktestb(k1, k1); code.ktestb(k1, k1);
code.setnz(overflow); code.setnz(overflow);
@ -148,10 +148,10 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
if constexpr (esize == 64) { if constexpr (esize == 64) {
code.pshufd(tmp, tmp, 0b11110101); code.pshufd(tmp, tmp, 0b11110101);
} }
code.pxor(tmp, code.MConst(xword, msb_mask, msb_mask)); code.pxor(tmp, code.XmmBConst<esize>(xword, msb_mask));
if (code.HasHostFeature(HostFeature::SSE41)) { if (code.HasHostFeature(HostFeature::SSE41)) {
code.ptest(xmm0, code.MConst(xword, msb_mask, msb_mask)); code.ptest(xmm0, code.XmmBConst<esize>(xword, msb_mask));
} else { } else {
FCODE(movmskp)(overflow.cvt32(), xmm0); FCODE(movmskp)(overflow.cvt32(), xmm0);
code.test(overflow.cvt32(), overflow.cvt32()); code.test(overflow.cvt32(), overflow.cvt32());

View file

@ -541,7 +541,7 @@ HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) {
if (imm_value == 0) { if (imm_value == 0) {
MAYBE_AVX(xorps, reg, reg); MAYBE_AVX(xorps, reg, reg);
} else { } else {
MAYBE_AVX(movaps, reg, code.MConst(code.xword, imm_value)); MAYBE_AVX(movaps, reg, code.XmmBConst<64>(code.xword, imm_value));
} }
return host_loc; return host_loc;
} }

View file

@ -17,4 +17,12 @@ inline void Or(volatile u32* ptr, u32 value) {
#endif #endif
} }
inline void And(volatile u32* ptr, u32 value) {
#ifdef _MSC_VER
_InterlockedAnd(reinterpret_cast<volatile long*>(ptr), value);
#else
__atomic_and_fetch(ptr, value, __ATOMIC_SEQ_CST);
#endif
}
} // namespace Dynarmic::Atomic } // namespace Dynarmic::Atomic

View file

@ -57,10 +57,15 @@ public:
/** /**
* Stops execution in Jit::Run. * Stops execution in Jit::Run.
* Can only be called from a callback.
*/ */
void HaltExecution(HaltReason hr = HaltReason::UserDefined1); void HaltExecution(HaltReason hr = HaltReason::UserDefined1);
/**
* Clears a halt reason from flags.
* Warning: Only use this if you're sure this won't introduce races.
*/
void ClearHalt(HaltReason hr = HaltReason::UserDefined1);
/// View and modify registers. /// View and modify registers.
std::array<std::uint32_t, 16>& Regs(); std::array<std::uint32_t, 16>& Regs();
const std::array<std::uint32_t, 16>& Regs() const; const std::array<std::uint32_t, 16>& Regs() const;

View file

@ -58,10 +58,15 @@ public:
/** /**
* Stops execution in Jit::Run. * Stops execution in Jit::Run.
* Can only be called from a callback.
*/ */
void HaltExecution(HaltReason hr = HaltReason::UserDefined1); void HaltExecution(HaltReason hr = HaltReason::UserDefined1);
/**
* Clears a halt reason from flags.
* Warning: Only use this if you're sure this won't introduce races.
*/
void ClearHalt(HaltReason hr = HaltReason::UserDefined1);
/// Read Stack Pointer /// Read Stack Pointer
std::uint64_t GetSP() const; std::uint64_t GetSP() const;
/// Modify Stack Pointer /// Modify Stack Pointer

View file

@ -0,0 +1,113 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <catch2/catch.hpp>
#include "./testenv.h"
#include "dynarmic/interface/A64/a64.h"
using namespace Dynarmic;
TEST_CASE("ensure fast dispatch entry is cleared even when a block does not have any patching requirements", "[a64]") {
A64TestEnv env;
A64::UserConfig conf{&env};
A64::Jit jit{conf};
REQUIRE(conf.HasOptimization(OptimizationFlag::FastDispatch));
env.code_mem_start_address = 100;
env.code_mem.clear();
env.code_mem.emplace_back(0xd2800d80); // MOV X0, 108
env.code_mem.emplace_back(0xd61f0000); // BR X0
env.code_mem.emplace_back(0xd2800540); // MOV X0, 42
env.code_mem.emplace_back(0x14000000); // B .
jit.SetPC(100);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 42);
jit.SetPC(100);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 42);
jit.InvalidateCacheRange(108, 4);
jit.SetPC(100);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 42);
env.code_mem[2] = 0xd28008a0; // MOV X0, 69
jit.SetPC(100);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 42);
jit.InvalidateCacheRange(108, 4);
jit.SetPC(100);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 69);
jit.SetPC(100);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 69);
}
TEST_CASE("ensure fast dispatch entry is cleared even when a block does not have any patching requirements 2", "[a64]") {
A64TestEnv env;
A64::UserConfig conf{&env};
A64::Jit jit{conf};
REQUIRE(conf.HasOptimization(OptimizationFlag::FastDispatch));
env.code_mem.emplace_back(0xd2800100); // MOV X0, 8
env.code_mem.emplace_back(0xd61f0000); // BR X0
env.code_mem.emplace_back(0xd2800540); // MOV X0, 42
env.code_mem.emplace_back(0x14000000); // B .
jit.SetPC(0);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 42);
jit.SetPC(0);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 42);
jit.InvalidateCacheRange(8, 4);
jit.SetPC(0);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 42);
env.code_mem[2] = 0xd28008a0; // MOV X0, 69
jit.SetPC(0);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 42);
jit.InvalidateCacheRange(8, 4);
jit.SetPC(0);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 69);
jit.SetPC(0);
env.ticks_left = 4;
jit.Run();
REQUIRE(jit.GetRegister(0) == 69);
}

View file

@ -20,6 +20,8 @@ endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS) if ("A64" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic_tests PRIVATE target_sources(dynarmic_tests PRIVATE
A64/a64.cpp A64/a64.cpp
A64/misaligned_page_table.cpp
A64/test_invalidation.cpp
A64/testenv.h A64/testenv.h
) )
endif() endif()
@ -44,7 +46,6 @@ if (DYNARMIC_TESTS_USE_UNICORN)
if ("A64" IN_LIST DYNARMIC_FRONTENDS) if ("A64" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic_tests PRIVATE target_sources(dynarmic_tests PRIVATE
A64/fuzz_with_unicorn.cpp A64/fuzz_with_unicorn.cpp
A64/misaligned_page_table.cpp
A64/verify_unicorn.cpp A64/verify_unicorn.cpp
unicorn_emu/a64_unicorn.cpp unicorn_emu/a64_unicorn.cpp
unicorn_emu/a64_unicorn.h unicorn_emu/a64_unicorn.h